Create ruff_notebook crate (#7039)

## Summary

This PR moves `ruff/jupyter` into its own `ruff_notebook` crate. Beyond
the move itself, there were a few challenges:

1. `ruff_notebook` relies on the source map abstraction. I've moved the
source map into `ruff_diagnostics`, since it doesn't have any
dependencies on its own and is used alongside diagnostics.
2. `ruff_notebook` has a couple tests for end-to-end linting and
autofixing. I had to leave these tests in `ruff` itself.
3. We had code in `ruff/jupyter` that relied on Python lexing, in order
to provide a more targeted error message in the event that a user saves
a `.py` file with a `.ipynb` extension. I removed this in order to avoid
a dependency on the parser, it felt like it wasn't worth retaining just
for that dependency.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-09-01 14:56:44 +01:00 committed by GitHub
parent 08e246764f
commit afcd00da56
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
48 changed files with 274 additions and 253 deletions

View file

@ -0,0 +1,24 @@
/// Jupyter Notebook indexing table
///
/// When we lint a jupyter notebook, we have to translate the row/column based on
/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct NotebookIndex {
/// Enter a row (1-based), get back the cell (1-based)
pub(super) row_to_cell: Vec<u32>,
/// Enter a row (1-based), get back the row in cell (1-based)
pub(super) row_to_row_in_cell: Vec<u32>,
}
impl NotebookIndex {
/// Returns the cell number (1-based) for the given row (1-based).
pub fn cell(&self, row: usize) -> Option<u32> {
self.row_to_cell.get(row).copied()
}
/// Returns the row number (1-based) in the cell (1-based) for the
/// given row (1-based).
pub fn cell_row(&self, row: usize) -> Option<u32> {
self.row_to_row_in_cell.get(row).copied()
}
}

View file

@ -0,0 +1,9 @@
//! Utils for reading and writing jupyter notebooks
pub use index::*;
pub use notebook::*;
pub use schema::*;
mod index;
mod notebook;
mod schema;

View file

@ -0,0 +1,523 @@
use std::cmp::Ordering;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::{io, iter};
use itertools::Itertools;
use once_cell::sync::OnceCell;
use serde::Serialize;
use serde_json::error::Category;
use thiserror::Error;
use uuid::Uuid;
use ruff_diagnostics::{SourceMap, SourceMarker};
use ruff_source_file::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_text_size::TextSize;
use crate::index::NotebookIndex;
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
/// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
let mut notebook = Notebook::from_path(path).map_err(|err| {
anyhow::anyhow!(
"Failed to read notebook file `{}`: {:?}",
path.display(),
err
)
})?;
let code = notebook.source_code().to_string();
notebook.update_cell_content(&code);
let mut writer = Vec::new();
notebook.write(&mut writer)?;
Ok(String::from_utf8(writer)?)
}
impl Display for SourceValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SourceValue::String(string) => f.write_str(string),
SourceValue::StringArray(string_array) => {
for string in string_array {
f.write_str(string)?;
}
Ok(())
}
}
}
}
impl Cell {
/// Return the [`SourceValue`] of the cell.
fn source(&self) -> &SourceValue {
match self {
Cell::Code(cell) => &cell.source,
Cell::Markdown(cell) => &cell.source,
Cell::Raw(cell) => &cell.source,
}
}
/// Update the [`SourceValue`] of the cell.
fn set_source(&mut self, source: SourceValue) {
match self {
Cell::Code(cell) => cell.source = source,
Cell::Markdown(cell) => cell.source = source,
Cell::Raw(cell) => cell.source = source,
}
}
/// Return `true` if it's a valid code cell.
///
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
/// source doesn't contain a cell magic.
fn is_valid_code_cell(&self) -> bool {
let source = match self {
Cell::Code(cell) => &cell.source,
_ => return false,
};
// Ignore cells containing cell magic as they act on the entire cell
// as compared to line magic which acts on a single line.
!match source {
SourceValue::String(string) => string
.lines()
.any(|line| line.trim_start().starts_with("%%")),
SourceValue::StringArray(string_array) => string_array
.iter()
.any(|line| line.trim_start().starts_with("%%")),
}
}
}
/// An error that can occur while deserializing a Jupyter Notebook.
#[derive(Error, Debug)]
pub enum NotebookError {
#[error(transparent)]
Io(#[from] io::Error),
#[error(transparent)]
Json(serde_json::Error),
#[error("Expected a Jupyter Notebook, which must be internally stored as JSON, but this file isn't valid JSON: {0}")]
InvalidJson(serde_json::Error),
#[error("This file does not match the schema expected of Jupyter Notebooks: {0}")]
InvalidSchema(serde_json::Error),
#[error("Expected Jupyter Notebook format 4, found: {0}")]
InvalidFormat(i64),
}
#[derive(Clone, Debug, PartialEq)]
pub struct Notebook {
/// Python source code of the notebook.
///
/// This is the concatenation of all valid code cells in the notebook
/// separated by a newline and a trailing newline. The trailing newline
/// is added to make sure that each cell ends with a newline which will
/// be removed when updating the cell content.
source_code: String,
/// The index of the notebook. This is used to map between the concatenated
/// source code and the original notebook.
index: OnceCell<NotebookIndex>,
/// The raw notebook i.e., the deserialized version of JSON string.
raw: RawNotebook,
/// The offsets of each cell in the concatenated source code. This includes
/// the first and last character offsets as well.
cell_offsets: Vec<TextSize>,
/// The cell index of all valid code cells in the notebook.
valid_code_cells: Vec<u32>,
/// Flag to indicate if the JSON string of the notebook has a trailing newline.
trailing_newline: bool,
}
impl Notebook {
/// Read the Jupyter Notebook from the given [`Path`].
pub fn from_path(path: &Path) -> Result<Self, NotebookError> {
Self::from_reader(BufReader::new(File::open(path)?))
}
/// Read the Jupyter Notebook from its JSON string.
pub fn from_source_code(source_code: &str) -> Result<Self, NotebookError> {
Self::from_reader(Cursor::new(source_code))
}
/// Read a Jupyter Notebook from a [`Read`] implementor.
///
/// See also the black implementation
/// <https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#L1017-L1046>
fn from_reader<R>(mut reader: R) -> Result<Self, NotebookError>
where
R: Read + Seek,
{
let trailing_newline = reader.seek(SeekFrom::End(-1)).is_ok_and(|_| {
let mut buf = [0; 1];
reader.read_exact(&mut buf).is_ok_and(|_| buf[0] == b'\n')
});
reader.rewind()?;
let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
Ok(notebook) => notebook,
Err(err) => {
// Translate the error into a diagnostic
return Err(match err.classify() {
Category::Io => NotebookError::Json(err),
Category::Syntax | Category::Eof => NotebookError::InvalidJson(err),
Category::Data => {
// We could try to read the schema version here but if this fails it's
// a bug anyway.
NotebookError::InvalidSchema(err)
}
});
}
};
// v4 is what everybody uses
if raw_notebook.nbformat != 4 {
// bail because we should have already failed at the json schema stage
return Err(NotebookError::InvalidFormat(raw_notebook.nbformat));
}
let valid_code_cells = raw_notebook
.cells
.iter()
.enumerate()
.filter(|(_, cell)| cell.is_valid_code_cell())
.map(|(idx, _)| u32::try_from(idx).unwrap())
.collect::<Vec<_>>();
let mut contents = Vec::with_capacity(valid_code_cells.len());
let mut current_offset = TextSize::from(0);
let mut cell_offsets = Vec::with_capacity(valid_code_cells.len());
cell_offsets.push(TextSize::from(0));
for &idx in &valid_code_cells {
let cell_contents = match &raw_notebook.cells[idx as usize].source() {
SourceValue::String(string) => string.clone(),
SourceValue::StringArray(string_array) => string_array.join(""),
};
current_offset += TextSize::of(&cell_contents) + TextSize::new(1);
contents.push(cell_contents);
cell_offsets.push(current_offset);
}
// Add cell ids to 4.5+ notebooks if they are missing
// https://github.com/astral-sh/ruff/issues/6834
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
for cell in &mut raw_notebook.cells {
let id = match cell {
Cell::Code(cell) => &mut cell.id,
Cell::Markdown(cell) => &mut cell.id,
Cell::Raw(cell) => &mut cell.id,
};
if id.is_none() {
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
*id = Some(Uuid::new_v4().to_string());
}
}
}
Ok(Self {
raw: raw_notebook,
index: OnceCell::new(),
// The additional newline at the end is to maintain consistency for
// all cells. These newlines will be removed before updating the
// source code with the transformed content. Refer `update_cell_content`.
source_code: contents.join("\n") + "\n",
cell_offsets,
valid_code_cells,
trailing_newline,
})
}
/// Update the cell offsets as per the given [`SourceMap`].
fn update_cell_offsets(&mut self, source_map: &SourceMap) {
// When there are multiple cells without any edits, the offsets of those
// cells will be updated using the same marker. So, we can keep track of
// the last marker used to update the offsets and check if it's still
// the closest marker to the current offset.
let mut last_marker: Option<&SourceMarker> = None;
// The first offset is always going to be at 0, so skip it.
for offset in self.cell_offsets.iter_mut().skip(1).rev() {
let closest_marker = match last_marker {
Some(marker) if marker.source() <= *offset => marker,
_ => {
let Some(marker) = source_map
.markers()
.iter()
.rev()
.find(|marker| marker.source() <= *offset)
else {
// There are no markers above the current offset, so we can
// stop here.
break;
};
last_marker = Some(marker);
marker
}
};
match closest_marker.source().cmp(&closest_marker.dest()) {
Ordering::Less => *offset += closest_marker.dest() - closest_marker.source(),
Ordering::Greater => *offset -= closest_marker.source() - closest_marker.dest(),
Ordering::Equal => (),
}
}
}
/// Update the cell contents with the transformed content.
///
/// ## Panics
///
/// Panics if the transformed content is out of bounds for any cell. This
/// can happen only if the cell offsets were not updated before calling
/// this method or the offsets were updated incorrectly.
fn update_cell_content(&mut self, transformed: &str) {
for (&idx, (start, end)) in self
.valid_code_cells
.iter()
.zip(self.cell_offsets.iter().tuple_windows::<(_, _)>())
{
let cell_content = transformed
.get(start.to_usize()..end.to_usize())
.unwrap_or_else(|| {
panic!(
"Transformed content out of bounds ({start:?}..{end:?}) for cell at {idx:?}"
);
});
self.raw.cells[idx as usize].set_source(SourceValue::StringArray(
UniversalNewlineIterator::from(
// We only need to strip the trailing newline which we added
// while concatenating the cell contents.
cell_content.strip_suffix('\n').unwrap_or(cell_content),
)
.map(|line| line.as_full_str().to_string())
.collect::<Vec<_>>(),
));
}
}
/// Build and return the [`JupyterIndex`].
///
/// ## Notes
///
/// Empty cells don't have any newlines, but there's a single visible line
/// in the UI. That single line needs to be accounted for.
///
/// In case of [`SourceValue::StringArray`], newlines are part of the strings.
/// So, to get the actual count of lines, we need to check for any trailing
/// newline for the last line.
///
/// For example, consider the following cell:
/// ```python
/// [
/// "import os\n",
/// "import sys\n",
/// ]
/// ```
///
/// Here, the array suggests that there are two lines, but the actual number
/// of lines visible in the UI is three. The same goes for [`SourceValue::String`]
/// where we need to check for the trailing newline.
///
/// The index building is expensive as it needs to go through the content of
/// every valid code cell.
fn build_index(&self) -> NotebookIndex {
let mut row_to_cell = vec![0];
let mut row_to_row_in_cell = vec![0];
for &idx in &self.valid_code_cells {
let line_count = match &self.raw.cells[idx as usize].source() {
SourceValue::String(string) => {
if string.is_empty() {
1
} else {
u32::try_from(NewlineWithTrailingNewline::from(string).count()).unwrap()
}
}
SourceValue::StringArray(string_array) => {
if string_array.is_empty() {
1
} else {
let trailing_newline =
usize::from(string_array.last().is_some_and(|s| s.ends_with('\n')));
u32::try_from(string_array.len() + trailing_newline).unwrap()
}
}
};
row_to_cell.extend(iter::repeat(idx + 1).take(line_count as usize));
row_to_row_in_cell.extend(1..=line_count);
}
NotebookIndex {
row_to_cell,
row_to_row_in_cell,
}
}
/// Return the notebook content.
///
/// This is the concatenation of all Python code cells.
pub fn source_code(&self) -> &str {
&self.source_code
}
/// Return the Jupyter notebook index.
///
/// The index is built only once when required. This is only used to
/// report diagnostics, so by that time all of the autofixes must have
/// been applied if `--fix` was passed.
pub fn index(&self) -> &NotebookIndex {
self.index.get_or_init(|| self.build_index())
}
/// Return the cell offsets for the concatenated source code corresponding
/// the Jupyter notebook.
pub fn cell_offsets(&self) -> &[TextSize] {
&self.cell_offsets
}
/// Return `true` if the notebook has a trailing newline, `false` otherwise.
pub fn trailing_newline(&self) -> bool {
self.trailing_newline
}
/// Update the notebook with the given sourcemap and transformed content.
pub fn update(&mut self, source_map: &SourceMap, transformed: String) {
// Cell offsets must be updated before updating the cell content as
// it depends on the offsets to extract the cell content.
self.index.take();
self.update_cell_offsets(source_map);
self.update_cell_content(&transformed);
self.source_code = transformed;
}
/// Return a slice of [`Cell`] in the Jupyter notebook.
pub fn cells(&self) -> &[Cell] {
&self.raw.cells
}
/// Return `true` if the notebook is a Python notebook, `false` otherwise.
pub fn is_python_notebook(&self) -> bool {
self.raw
.metadata
.language_info
.as_ref()
.map_or(true, |language| language.name == "python")
}
/// Write the notebook back to the given [`Write`] implementor.
pub fn write(&self, writer: &mut dyn Write) -> anyhow::Result<()> {
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut serializer = serde_json::Serializer::with_formatter(writer, formatter);
SortAlphabetically(&self.raw).serialize(&mut serializer)?;
if self.trailing_newline {
writeln!(serializer.into_inner())?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use anyhow::Result;
use test_case::test_case;
use crate::{Cell, Notebook, NotebookError, NotebookIndex};
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
Path::new("./resources/test/fixtures/jupyter").join(path)
}
#[test]
fn test_python() -> Result<(), NotebookError> {
let notebook = Notebook::from_path(&notebook_path("valid.ipynb"))?;
assert!(notebook.is_python_notebook());
Ok(())
}
#[test]
fn test_r() -> Result<(), NotebookError> {
let notebook = Notebook::from_path(&notebook_path("R.ipynb"))?;
assert!(!notebook.is_python_notebook());
Ok(())
}
#[test]
fn test_invalid() {
assert!(matches!(
Notebook::from_path(&notebook_path("invalid_extension.ipynb")),
Err(NotebookError::InvalidJson(_))
));
assert!(matches!(
Notebook::from_path(&notebook_path("not_json.ipynb")),
Err(NotebookError::InvalidJson(_))
));
assert!(matches!(
Notebook::from_path(&notebook_path("wrong_schema.ipynb")),
Err(NotebookError::InvalidSchema(_))
));
}
#[test_case(Path::new("markdown.json"), false; "markdown")]
#[test_case(Path::new("only_magic.json"), true; "only_magic")]
#[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")]
#[test_case(Path::new("only_code.json"), true; "only_code")]
#[test_case(Path::new("cell_magic.json"), false; "cell_magic")]
fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> {
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
let path = notebook_path("cell").join(path);
let source_code = std::fs::read_to_string(path)?;
Ok(serde_json::from_str(&source_code)?)
}
assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected);
Ok(())
}
#[test]
fn test_concat_notebook() -> Result<(), NotebookError> {
let notebook = Notebook::from_path(&notebook_path("valid.ipynb"))?;
assert_eq!(
notebook.source_code,
r#"def unused_variable():
x = 1
y = 2
print(f"cell one: {y}")
unused_variable()
def mutable_argument(z=set()):
print(f"cell two: {z}")
mutable_argument()
print("after empty cells")
"#
);
assert_eq!(
notebook.index(),
&NotebookIndex {
row_to_cell: vec![0, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 5, 7, 7, 8],
row_to_row_in_cell: vec![0, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 1, 1, 2, 1],
}
);
assert_eq!(
notebook.cell_offsets(),
&[
0.into(),
90.into(),
168.into(),
169.into(),
171.into(),
198.into()
]
);
Ok(())
}
}

View file

@ -0,0 +1,212 @@
//! The JSON schema of a Jupyter Notebook, entrypoint is [`RawNotebook`]
//!
//! Generated by <https://app.quicktype.io/> from
//! <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
//! Jupyter Notebook v4.5 JSON schema.
//!
//! The following changes were made to the generated version:
//! * Only keep the required structs and enums.
//! * `Cell::id` is optional because it wasn't required <v4.5
//! * `#[serde(deny_unknown_fields)]` was added where the schema had
//! `"additionalProperties": false`
//! * `#[serde(flatten)] pub other: BTreeMap<String, Value>` for
//! `"additionalProperties": true` as preparation for round-trip support.
//! * `#[serde(skip_serializing_none)]` was added to all structs where one or
//! more fields were optional to avoid serializing `null` values.
//! * `Cell::execution_count` is a required property only for code cells, but
//! we serialize it for all cells. This is because we can't know if a cell is
//! a code cell or not without looking at the `cell_type` property, which
//! would require a custom serializer.
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_with::skip_serializing_none;
fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
value: &T,
serializer: S,
) -> Result<S::Ok, S::Error> {
let value = serde_json::to_value(value).map_err(serde::ser::Error::custom)?;
value.serialize(serializer)
}
/// This is used to serialize any value implementing [`Serialize`] alphabetically.
///
/// The reason for this is to maintain consistency in the generated JSON string,
/// which is useful for diffing. The default serializer keeps the order of the
/// fields as they are defined in the struct, which will not be consistent when
/// there are `extra` fields.
///
/// # Example
///
/// ```
/// use std::collections::BTreeMap;
///
/// use serde::Serialize;
///
/// use ruff_notebook::SortAlphabetically;
///
/// #[derive(Serialize)]
/// struct MyStruct {
/// a: String,
/// #[serde(flatten)]
/// extra: BTreeMap<String, String>,
/// b: String,
/// }
///
/// let my_struct = MyStruct {
/// a: "a".to_string(),
/// extra: BTreeMap::from([
/// ("d".to_string(), "d".to_string()),
/// ("c".to_string(), "c".to_string()),
/// ]),
/// b: "b".to_string(),
/// };
///
/// let serialized = serde_json::to_string_pretty(&SortAlphabetically(&my_struct)).unwrap();
/// assert_eq!(
/// serialized,
/// r#"{
/// "a": "a",
/// "b": "b",
/// "c": "c",
/// "d": "d"
/// }"#
/// );
/// ```
#[derive(Serialize)]
pub struct SortAlphabetically<T: Serialize>(#[serde(serialize_with = "sort_alphabetically")] pub T);
/// The root of the JSON of a Jupyter Notebook
///
/// Generated by <https://app.quicktype.io/> from
/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
/// Jupyter Notebook v4.5 JSON schema.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct RawNotebook {
/// Array of cells of the current notebook.
pub cells: Vec<Cell>,
/// Notebook root-level metadata.
pub metadata: RawNotebookMetadata,
/// Notebook format (major number). Incremented between backwards incompatible changes to the
/// notebook format.
pub nbformat: i64,
/// Notebook format (minor number). Incremented for backward compatible changes to the
/// notebook format.
pub nbformat_minor: i64,
}
/// String identifying the type of cell.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "cell_type")]
pub enum Cell {
#[serde(rename = "code")]
Code(CodeCell),
#[serde(rename = "markdown")]
Markdown(MarkdownCell),
#[serde(rename = "raw")]
Raw(RawCell),
}
/// Notebook raw nbconvert cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct RawCell {
pub attachments: Option<Value>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub source: SourceValue,
}
/// Notebook markdown cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MarkdownCell {
pub attachments: Option<Value>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub source: SourceValue,
}
/// Notebook code cell.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct CodeCell {
/// The code cell's prompt number. Will be null if the cell has not been run.
pub execution_count: Option<i64>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
/// Execution, display, or stream outputs.
pub outputs: Vec<Value>,
pub source: SourceValue,
}
/// Notebook root-level metadata.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct RawNotebookMetadata {
/// The author(s) of the notebook document
pub authors: Option<Value>,
/// Kernel information.
pub kernelspec: Option<Value>,
/// Kernel information.
pub language_info: Option<LanguageInfo>,
/// Original notebook format (major number) before converting the notebook between versions.
/// This should never be written to a file.
pub orig_nbformat: Option<i64>,
/// The title of the notebook document
pub title: Option<String>,
/// For additional properties.
#[serde(flatten)]
pub extra: BTreeMap<String, Value>,
}
/// Kernel information.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct LanguageInfo {
/// The codemirror mode to use for code in this language.
pub codemirror_mode: Option<Value>,
/// The file extension for files in this language.
pub file_extension: Option<String>,
/// The mimetype corresponding to files in this language.
pub mimetype: Option<String>,
/// The programming language which this kernel runs.
pub name: String,
/// The pygments lexer to use for code in this language.
pub pygments_lexer: Option<String>,
/// For additional properties.
#[serde(flatten)]
pub extra: BTreeMap<String, Value>,
}
/// mimetype output (e.g. text/plain), represented as either an array of strings or a
/// string.
///
/// Contents of the cell, represented as an array of lines.
///
/// The stream's text output, represented as an array of strings.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum SourceValue {
String(String),
StringArray(Vec<String>),
}