mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-07 17:10:31 +00:00
Maintain consistency when deserializing to JSON (#5114)
## Summary Maintain consistency while deserializing Jupyter notebook to JSON. The following changes were made: 1. Use string array to store the source value as that's the default (5781720423/nbformat/v4/nbjson.py (L56-L57)
) 2. Remove unused structs and enums 3. Reorder the keys in alphabetical order as that's the default. (5781720423/nbformat/v4/nbjson.py (L51)
) ### Side effect Removing the `preserve_order` feature means that the order of keys in JSON output (`--format json`) will be in alphabetical order. This is because the value is represented using `serde_json::Value` which internally is a `BTreeMap`, thus sorting it as per the string key. For posterity if this turns out to be not ideal, then we could define a struct representing the JSON object and the order of struct fields will determine the order in the JSON string. ## Test Plan Add a test case to assert the raw JSON string.
This commit is contained in:
parent
94abf7f088
commit
48f4f2d63d
15 changed files with 346 additions and 370 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2368,7 +2368,6 @@ version = "1.0.96"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
|
|
|
@ -43,7 +43,7 @@ rustpython-literal = { git = "https://github.com/astral-sh/RustPython-Parser.git
|
|||
rustpython-parser = { git = "https://github.com/astral-sh/RustPython-Parser.git", rev = "0dc8fdf52d146698c5bcf0b842fddc9e398ad8db", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
|
||||
schemars = { version = "0.8.12" }
|
||||
serde = { version = "1.0.152", features = ["derive"] }
|
||||
serde_json = { version = "1.0.93", features = ["preserve_order"] }
|
||||
serde_json = { version = "1.0.93" }
|
||||
shellexpand = { version = "3.0.0" }
|
||||
similar = { version = "2.2.1", features = ["inline"] }
|
||||
smallvec = { version = "1.10.0" }
|
||||
|
|
37
crates/ruff/resources/test/fixtures/jupyter/after_fix.ipynb
vendored
Normal file
37
crates/ruff/resources/test/fixtures/jupyter/after_fix.ipynb
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import math\n",
|
||||
"\n",
|
||||
"math.pi"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python (ruff)",
|
||||
"language": "python",
|
||||
"name": "ruff"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
38
crates/ruff/resources/test/fixtures/jupyter/before_fix.ipynb
vendored
Normal file
38
crates/ruff/resources/test/fixtures/jupyter/before_fix.ipynb
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import math\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"math.pi"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python (ruff)",
|
||||
"language": "python",
|
||||
"name": "ruff"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -1,5 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["def foo():\n", " pass\n", "\n", "%timeit foo()"]
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"source": ["This is a markdown cell\n", "Some more content"]
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["def foo():\n", " pass"]
|
||||
}
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "%timeit print('hello world')"
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufWriter, Cursor, Write};
|
||||
use std::io::{BufReader, BufWriter, Write};
|
||||
use std::iter;
|
||||
use std::path::Path;
|
||||
|
||||
|
@ -10,12 +10,12 @@ use serde::Serialize;
|
|||
use serde_json::error::Category;
|
||||
|
||||
use ruff_diagnostics::Diagnostic;
|
||||
use ruff_python_whitespace::NewlineWithTrailingNewline;
|
||||
use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
||||
use crate::autofix::source_map::{SourceMap, SourceMarker};
|
||||
use crate::jupyter::index::JupyterIndex;
|
||||
use crate::jupyter::{Cell, CellType, RawNotebook, SourceValue};
|
||||
use crate::jupyter::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
|
||||
use crate::rules::pycodestyle::rules::SyntaxError;
|
||||
use crate::IOError;
|
||||
|
||||
|
@ -34,9 +34,9 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
|||
})?;
|
||||
let code = notebook.content().to_string();
|
||||
notebook.update_cell_content(&code);
|
||||
let mut buffer = Cursor::new(Vec::new());
|
||||
let mut buffer = BufWriter::new(Vec::new());
|
||||
notebook.write_inner(&mut buffer)?;
|
||||
Ok(String::from_utf8(buffer.into_inner())?)
|
||||
Ok(String::from_utf8(buffer.into_inner()?)?)
|
||||
}
|
||||
|
||||
/// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`).
|
||||
|
@ -49,18 +49,37 @@ pub fn is_jupyter_notebook(path: &Path) -> bool {
|
|||
}
|
||||
|
||||
impl Cell {
|
||||
/// Return the [`SourceValue`] of the cell.
|
||||
fn source(&self) -> &SourceValue {
|
||||
match self {
|
||||
Cell::Code(cell) => &cell.source,
|
||||
Cell::Markdown(cell) => &cell.source,
|
||||
Cell::Raw(cell) => &cell.source,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the [`SourceValue`] of the cell.
|
||||
fn set_source(&mut self, source: SourceValue) {
|
||||
match self {
|
||||
Cell::Code(cell) => cell.source = source,
|
||||
Cell::Markdown(cell) => cell.source = source,
|
||||
Cell::Raw(cell) => cell.source = source,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return `true` if it's a valid code cell.
|
||||
///
|
||||
/// A valid code cell is a cell where the type is [`CellType::Code`] and the
|
||||
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
|
||||
/// source doesn't contain a magic, shell or help command.
|
||||
fn is_valid_code_cell(&self) -> bool {
|
||||
if self.cell_type != CellType::Code {
|
||||
return false;
|
||||
}
|
||||
let source = match self {
|
||||
Cell::Code(cell) => &cell.source,
|
||||
_ => return false,
|
||||
};
|
||||
// Ignore a cell if it contains a magic command. There could be valid
|
||||
// Python code as well, but we'll ignore that for now.
|
||||
// TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py
|
||||
!match &self.source {
|
||||
!match source {
|
||||
SourceValue::String(string) => string.lines().any(|line| {
|
||||
MAGIC_PREFIX
|
||||
.iter()
|
||||
|
@ -92,7 +111,7 @@ pub struct Notebook {
|
|||
/// The offsets of each cell in the concatenated source code. This includes
|
||||
/// the first and last character offsets as well.
|
||||
cell_offsets: Vec<TextSize>,
|
||||
/// The cell numbers of all valid code cells in the notebook.
|
||||
/// The cell index of all valid code cells in the notebook.
|
||||
valid_code_cells: Vec<u32>,
|
||||
}
|
||||
|
||||
|
@ -108,7 +127,7 @@ impl Notebook {
|
|||
TextRange::default(),
|
||||
)
|
||||
})?);
|
||||
let notebook: RawNotebook = match serde_json::from_reader(reader) {
|
||||
let raw_notebook: RawNotebook = match serde_json::from_reader(reader) {
|
||||
Ok(notebook) => notebook,
|
||||
Err(err) => {
|
||||
// Translate the error into a diagnostic
|
||||
|
@ -176,34 +195,34 @@ impl Notebook {
|
|||
};
|
||||
|
||||
// v4 is what everybody uses
|
||||
if notebook.nbformat != 4 {
|
||||
if raw_notebook.nbformat != 4 {
|
||||
// bail because we should have already failed at the json schema stage
|
||||
return Err(Box::new(Diagnostic::new(
|
||||
SyntaxError {
|
||||
message: format!(
|
||||
"Expected Jupyter Notebook format 4, found {}",
|
||||
notebook.nbformat
|
||||
raw_notebook.nbformat
|
||||
),
|
||||
},
|
||||
TextRange::default(),
|
||||
)));
|
||||
}
|
||||
|
||||
let valid_code_cells = notebook
|
||||
let valid_code_cells = raw_notebook
|
||||
.cells
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, cell)| cell.is_valid_code_cell())
|
||||
.map(|(pos, _)| u32::try_from(pos).unwrap())
|
||||
.map(|(idx, _)| u32::try_from(idx).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut contents = Vec::with_capacity(valid_code_cells.len());
|
||||
let mut current_offset = TextSize::from(0);
|
||||
let mut cell_offsets = Vec::with_capacity(notebook.cells.len());
|
||||
let mut cell_offsets = Vec::with_capacity(valid_code_cells.len());
|
||||
cell_offsets.push(TextSize::from(0));
|
||||
|
||||
for &pos in &valid_code_cells {
|
||||
let cell_contents = match ¬ebook.cells[pos as usize].source {
|
||||
for &idx in &valid_code_cells {
|
||||
let cell_contents = match &raw_notebook.cells[idx as usize].source() {
|
||||
SourceValue::String(string) => string.clone(),
|
||||
SourceValue::StringArray(string_array) => string_array.join(""),
|
||||
};
|
||||
|
@ -213,7 +232,7 @@ impl Notebook {
|
|||
}
|
||||
|
||||
Ok(Self {
|
||||
raw: notebook,
|
||||
raw: raw_notebook,
|
||||
index: OnceCell::new(),
|
||||
// The additional newline at the end is to maintain consistency for
|
||||
// all cells. These newlines will be removed before updating the
|
||||
|
@ -267,7 +286,7 @@ impl Notebook {
|
|||
/// can happen only if the cell offsets were not updated before calling
|
||||
/// this method or the offsets were updated incorrectly.
|
||||
fn update_cell_content(&mut self, transformed: &str) {
|
||||
for (&pos, (start, end)) in self
|
||||
for (&idx, (start, end)) in self
|
||||
.valid_code_cells
|
||||
.iter()
|
||||
.zip(self.cell_offsets.iter().tuple_windows::<(_, _)>())
|
||||
|
@ -275,22 +294,25 @@ impl Notebook {
|
|||
let cell_content = transformed
|
||||
.get(start.to_usize()..end.to_usize())
|
||||
.unwrap_or_else(|| {
|
||||
panic!("Transformed content out of bounds ({start:?}..{end:?}) for cell {pos}");
|
||||
panic!(
|
||||
"Transformed content out of bounds ({start:?}..{end:?}) for cell at {idx:?}"
|
||||
);
|
||||
});
|
||||
self.raw.cells[pos as usize].source = SourceValue::String(
|
||||
cell_content
|
||||
self.raw.cells[idx as usize].set_source(SourceValue::StringArray(
|
||||
UniversalNewlineIterator::from(
|
||||
// We only need to strip the trailing newline which we added
|
||||
// while concatenating the cell contents.
|
||||
.strip_suffix('\n')
|
||||
.unwrap_or(cell_content)
|
||||
.to_string(),
|
||||
);
|
||||
cell_content.strip_suffix('\n').unwrap_or(cell_content),
|
||||
)
|
||||
.map(|line| line.as_full_str().to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/// Build and return the [`JupyterIndex`].
|
||||
///
|
||||
/// # Notes
|
||||
/// ## Notes
|
||||
///
|
||||
/// Empty cells don't have any newlines, but there's a single visible line
|
||||
/// in the UI. That single line needs to be accounted for.
|
||||
|
@ -317,8 +339,8 @@ impl Notebook {
|
|||
let mut row_to_cell = vec![0];
|
||||
let mut row_to_row_in_cell = vec![0];
|
||||
|
||||
for &pos in &self.valid_code_cells {
|
||||
let line_count = match &self.raw.cells[pos as usize].source {
|
||||
for &idx in &self.valid_code_cells {
|
||||
let line_count = match &self.raw.cells[idx as usize].source() {
|
||||
SourceValue::String(string) => {
|
||||
if string.is_empty() {
|
||||
1
|
||||
|
@ -336,7 +358,7 @@ impl Notebook {
|
|||
}
|
||||
}
|
||||
};
|
||||
row_to_cell.extend(iter::repeat(pos + 1).take(line_count as usize));
|
||||
row_to_cell.extend(iter::repeat(idx + 1).take(line_count as usize));
|
||||
row_to_row_in_cell.extend(1..=line_count);
|
||||
}
|
||||
|
||||
|
@ -390,7 +412,7 @@ impl Notebook {
|
|||
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
|
||||
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
|
||||
let mut ser = serde_json::Serializer::with_formatter(writer, formatter);
|
||||
self.raw.serialize(&mut ser)?;
|
||||
SortAlphabetically(&self.raw).serialize(&mut ser)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -404,6 +426,7 @@ impl Notebook {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
|
@ -536,4 +559,21 @@ print("after empty cells")
|
|||
assert_messages!(diagnostics, path, source_kind);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_consistency() -> Result<()> {
|
||||
let path = "before_fix.ipynb".to_string();
|
||||
let (_, source_kind) = test_notebook_path(
|
||||
path,
|
||||
Path::new("after_fix.ipynb"),
|
||||
&settings::Settings::for_rule(Rule::UnusedImport),
|
||||
)?;
|
||||
let mut writer = BufWriter::new(Vec::new());
|
||||
source_kind.expect_jupyter().write_inner(&mut writer)?;
|
||||
let actual = String::from_utf8(writer.into_inner()?)?;
|
||||
let expected =
|
||||
std::fs::read_to_string(test_resource_path("fixtures/jupyter/after_fix.ipynb"))?;
|
||||
assert_eq!(actual, expected);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
//! Jupyter Notebook v4.5 JSON schema.
|
||||
//!
|
||||
//! The following changes were made to the generated version:
|
||||
//! * Only keep the required structs and enums.
|
||||
//! * `Cell::id` is optional because it wasn't required <v4.5
|
||||
//! * `#[serde(deny_unknown_fields)]` was added where the schema had
|
||||
//! `"additionalProperties": false`
|
||||
|
@ -12,26 +13,84 @@
|
|||
//! `"additionalProperties": true` as preparation for round-trip support.
|
||||
//! * `#[serde(skip_serializing_none)]` was added to all structs where one or
|
||||
//! more fields were optional to avoid serializing `null` values.
|
||||
//! * `Output::data` & `Cell::attachments` were changed to `Value` because
|
||||
//! the scheme had `patternProperties`.
|
||||
//! * `Cell::execution_count` is a required property only for code cells, but
|
||||
//! we serialize it for all cells. This is because we can't know if a cell is
|
||||
//! a code cell or not without looking at the `cell_type` property, which
|
||||
//! would require a custom serializer.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use serde_with::skip_serializing_none;
|
||||
|
||||
fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
|
||||
value: &T,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error> {
|
||||
let value = serde_json::to_value(value).map_err(serde::ser::Error::custom)?;
|
||||
value.serialize(serializer)
|
||||
}
|
||||
|
||||
/// This is used to serialize any value implementing [`Serialize`] alphabetically.
|
||||
///
|
||||
/// The reason for this is to maintain consistency in the generated JSON string,
|
||||
/// which is useful for diffing. The default serializer keeps the order of the
|
||||
/// fields as they are defined in the struct, which will not be consistent when
|
||||
/// there are `extra` fields.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use std::collections::BTreeMap;
|
||||
///
|
||||
/// use serde::Serialize;
|
||||
///
|
||||
/// use ruff::jupyter::SortAlphabetically;
|
||||
///
|
||||
/// #[derive(Serialize)]
|
||||
/// struct MyStruct {
|
||||
/// a: String,
|
||||
/// #[serde(flatten)]
|
||||
/// extra: BTreeMap<String, String>,
|
||||
/// b: String,
|
||||
/// }
|
||||
///
|
||||
/// let my_struct = MyStruct {
|
||||
/// a: "a".to_string(),
|
||||
/// extra: BTreeMap::from([
|
||||
/// ("d".to_string(), "d".to_string()),
|
||||
/// ("c".to_string(), "c".to_string()),
|
||||
/// ]),
|
||||
/// b: "b".to_string(),
|
||||
/// };
|
||||
///
|
||||
/// let serialized = serde_json::to_string_pretty(&SortAlphabetically(&my_struct)).unwrap();
|
||||
/// assert_eq!(
|
||||
/// serialized,
|
||||
/// r#"{
|
||||
/// "a": "a",
|
||||
/// "b": "b",
|
||||
/// "c": "c",
|
||||
/// "d": "d"
|
||||
/// }"#
|
||||
/// );
|
||||
/// ```
|
||||
#[derive(Serialize)]
|
||||
pub struct SortAlphabetically<T: Serialize>(#[serde(serialize_with = "sort_alphabetically")] pub T);
|
||||
|
||||
/// The root of the JSON of a Jupyter Notebook
|
||||
///
|
||||
/// Generated by <https://app.quicktype.io/> from
|
||||
/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
|
||||
/// Jupyter Notebook v4.5 JSON schema.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct RawNotebook {
|
||||
/// Array of cells of the current notebook.
|
||||
pub cells: Vec<Cell>,
|
||||
/// Notebook root-level metadata.
|
||||
pub metadata: JupyterNotebookMetadata,
|
||||
pub metadata: RawNotebookMetadata,
|
||||
/// Notebook format (major number). Incremented between backwards incompatible changes to the
|
||||
/// notebook format.
|
||||
pub nbformat: i64,
|
||||
|
@ -40,119 +99,73 @@ pub struct RawNotebook {
|
|||
pub nbformat_minor: i64,
|
||||
}
|
||||
|
||||
/// String identifying the type of cell.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
#[serde(tag = "cell_type")]
|
||||
pub enum Cell {
|
||||
#[serde(rename = "code")]
|
||||
Code(CodeCell),
|
||||
#[serde(rename = "markdown")]
|
||||
Markdown(MarkdownCell),
|
||||
#[serde(rename = "raw")]
|
||||
Raw(RawCell),
|
||||
}
|
||||
|
||||
/// Notebook raw nbconvert cell.
|
||||
///
|
||||
/// Notebook markdown cell.
|
||||
///
|
||||
/// Notebook code cell.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct Cell {
|
||||
pub attachments: Option<HashMap<String, HashMap<String, Value>>>,
|
||||
/// String identifying the type of cell.
|
||||
pub cell_type: CellType,
|
||||
pub struct RawCell {
|
||||
pub attachments: Option<Value>,
|
||||
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
|
||||
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
|
||||
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
|
||||
pub id: Option<String>,
|
||||
/// Cell-level metadata.
|
||||
pub metadata: CellMetadata,
|
||||
pub metadata: Value,
|
||||
pub source: SourceValue,
|
||||
}
|
||||
|
||||
/// Notebook markdown cell.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct MarkdownCell {
|
||||
pub attachments: Option<Value>,
|
||||
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
|
||||
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
|
||||
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
|
||||
pub id: Option<String>,
|
||||
/// Cell-level metadata.
|
||||
pub metadata: Value,
|
||||
pub source: SourceValue,
|
||||
}
|
||||
|
||||
/// Notebook code cell.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct CodeCell {
|
||||
/// The code cell's prompt number. Will be null if the cell has not been run.
|
||||
pub execution_count: Option<i64>,
|
||||
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
|
||||
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
|
||||
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
|
||||
pub id: Option<String>,
|
||||
/// Cell-level metadata.
|
||||
pub metadata: Value,
|
||||
/// Execution, display, or stream outputs.
|
||||
pub outputs: Option<Vec<Output>>,
|
||||
}
|
||||
|
||||
/// Cell-level metadata.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct CellMetadata {
|
||||
/// Raw cell metadata format for nbconvert.
|
||||
pub format: Option<String>,
|
||||
/// Official Jupyter Metadata for Raw Cells
|
||||
///
|
||||
/// Official Jupyter Metadata for Markdown Cells
|
||||
///
|
||||
/// Official Jupyter Metadata for Code Cells
|
||||
pub jupyter: Option<HashMap<String, Option<Value>>>,
|
||||
pub name: Option<String>,
|
||||
pub tags: Option<Vec<String>>,
|
||||
/// Whether the cell's output is collapsed/expanded.
|
||||
pub collapsed: Option<bool>,
|
||||
/// Execution time for the code in the cell. This tracks time at which messages are received
|
||||
/// from iopub or shell channels
|
||||
pub execution: Option<Execution>,
|
||||
/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
|
||||
pub scrolled: Option<ScrolledUnion>,
|
||||
/// Custom added: round-trip support
|
||||
#[serde(flatten)]
|
||||
pub other: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
/// Execution time for the code in the cell. This tracks time at which messages are received
|
||||
/// from iopub or shell channels
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct Execution {
|
||||
/// header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates
|
||||
/// the time at which the kernel broadcasts an execute_input message to connected frontends
|
||||
#[serde(rename = "iopub.execute_input")]
|
||||
pub iopub_execute_input: Option<String>,
|
||||
/// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
|
||||
/// is 'busy'
|
||||
#[serde(rename = "iopub.status.busy")]
|
||||
pub iopub_status_busy: Option<String>,
|
||||
/// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
|
||||
/// is 'idle'. It indicates the time at which kernel finished processing the associated
|
||||
/// request
|
||||
#[serde(rename = "iopub.status.idle")]
|
||||
pub iopub_status_idle: Option<String>,
|
||||
/// header.date (in ISO 8601 format) of the shell channel's execute_reply message. It
|
||||
/// indicates the time at which the execute_reply message was created
|
||||
#[serde(rename = "shell.execute_reply")]
|
||||
pub shell_execute_reply: Option<String>,
|
||||
}
|
||||
|
||||
/// Result of executing a code cell.
|
||||
///
|
||||
/// Data displayed as a result of code cell execution.
|
||||
///
|
||||
/// Stream output from a code cell.
|
||||
///
|
||||
/// Output of an error that occurred during code cell execution.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct Output {
|
||||
pub data: Option<HashMap<String, Value>>,
|
||||
/// A result's prompt number.
|
||||
pub execution_count: Option<i64>,
|
||||
pub metadata: Option<HashMap<String, Option<Value>>>,
|
||||
/// Type of cell output.
|
||||
pub output_type: OutputType,
|
||||
/// The name of the stream (stdout, stderr).
|
||||
pub name: Option<String>,
|
||||
/// The stream's text output, represented as an array of strings.
|
||||
pub text: Option<TextUnion>,
|
||||
/// The name of the error.
|
||||
pub ename: Option<String>,
|
||||
/// The value, or message, of the error.
|
||||
pub evalue: Option<String>,
|
||||
/// The error's traceback, represented as an array of strings.
|
||||
pub traceback: Option<Vec<String>>,
|
||||
pub outputs: Vec<Value>,
|
||||
pub source: SourceValue,
|
||||
}
|
||||
|
||||
/// Notebook root-level metadata.
|
||||
#[skip_serializing_none]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct JupyterNotebookMetadata {
|
||||
pub struct RawNotebookMetadata {
|
||||
/// The author(s) of the notebook document
|
||||
pub authors: Option<Vec<Option<Value>>>,
|
||||
pub authors: Option<Value>,
|
||||
/// Kernel information.
|
||||
pub kernelspec: Option<Kernelspec>,
|
||||
pub kernelspec: Option<Value>,
|
||||
/// Kernel information.
|
||||
pub language_info: Option<LanguageInfo>,
|
||||
/// Original notebook format (major number) before converting the notebook between versions.
|
||||
|
@ -160,21 +173,9 @@ pub struct JupyterNotebookMetadata {
|
|||
pub orig_nbformat: Option<i64>,
|
||||
/// The title of the notebook document
|
||||
pub title: Option<String>,
|
||||
/// Custom added: round-trip support
|
||||
/// For additional properties.
|
||||
#[serde(flatten)]
|
||||
pub other: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
/// Kernel information.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct Kernelspec {
|
||||
/// Name to display in UI.
|
||||
pub display_name: String,
|
||||
/// Name of the kernel specification.
|
||||
pub name: String,
|
||||
/// Custom added: round-trip support
|
||||
#[serde(flatten)]
|
||||
pub other: BTreeMap<String, Value>,
|
||||
pub extra: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
/// Kernel information.
|
||||
|
@ -182,7 +183,7 @@ pub struct Kernelspec {
|
|||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct LanguageInfo {
|
||||
/// The codemirror mode to use for code in this language.
|
||||
pub codemirror_mode: Option<CodemirrorMode>,
|
||||
pub codemirror_mode: Option<Value>,
|
||||
/// The file extension for files in this language.
|
||||
pub file_extension: Option<String>,
|
||||
/// The mimetype corresponding to files in this language.
|
||||
|
@ -191,9 +192,9 @@ pub struct LanguageInfo {
|
|||
pub name: String,
|
||||
/// The pygments lexer to use for code in this language.
|
||||
pub pygments_lexer: Option<String>,
|
||||
/// Custom added: round-trip support
|
||||
/// For additional properties.
|
||||
#[serde(flatten)]
|
||||
pub other: BTreeMap<String, Value>,
|
||||
pub extra: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
/// mimetype output (e.g. text/plain), represented as either an array of strings or a
|
||||
|
@ -208,62 +209,3 @@ pub enum SourceValue {
|
|||
String(String),
|
||||
StringArray(Vec<String>),
|
||||
}
|
||||
|
||||
/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(untagged)]
|
||||
pub enum ScrolledUnion {
|
||||
Bool(bool),
|
||||
Enum(ScrolledEnum),
|
||||
}
|
||||
|
||||
/// mimetype output (e.g. text/plain), represented as either an array of strings or a
|
||||
/// string.
|
||||
///
|
||||
/// Contents of the cell, represented as an array of lines.
|
||||
///
|
||||
/// The stream's text output, represented as an array of strings.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(untagged)]
|
||||
pub enum TextUnion {
|
||||
String(String),
|
||||
StringArray(Vec<String>),
|
||||
}
|
||||
|
||||
/// The codemirror mode to use for code in this language.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(untagged)]
|
||||
pub enum CodemirrorMode {
|
||||
AnythingMap(HashMap<String, Option<Value>>),
|
||||
String(String),
|
||||
}
|
||||
|
||||
/// String identifying the type of cell.
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Copy, Clone)]
|
||||
pub enum CellType {
|
||||
#[serde(rename = "code")]
|
||||
Code,
|
||||
#[serde(rename = "markdown")]
|
||||
Markdown,
|
||||
#[serde(rename = "raw")]
|
||||
Raw,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
|
||||
pub enum ScrolledEnum {
|
||||
#[serde(rename = "auto")]
|
||||
Auto,
|
||||
}
|
||||
|
||||
/// Type of cell output.
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
|
||||
pub enum OutputType {
|
||||
#[serde(rename = "display_data")]
|
||||
DisplayData,
|
||||
#[serde(rename = "error")]
|
||||
Error,
|
||||
#[serde(rename = "execute_result")]
|
||||
ExecuteResult,
|
||||
#[serde(rename = "stream")]
|
||||
Stream,
|
||||
}
|
||||
|
|
|
@ -5,38 +5,38 @@ expression: redact_fingerprint(&content)
|
|||
[
|
||||
{
|
||||
"description": "(F401) `os` imported but unused",
|
||||
"severity": "major",
|
||||
"fingerprint": "<redacted>",
|
||||
"location": {
|
||||
"path": "fib.py",
|
||||
"lines": {
|
||||
"begin": 1,
|
||||
"end": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
"path": "fib.py"
|
||||
},
|
||||
"severity": "major"
|
||||
},
|
||||
{
|
||||
"description": "(F841) Local variable `x` is assigned to but never used",
|
||||
"severity": "major",
|
||||
"fingerprint": "<redacted>",
|
||||
"location": {
|
||||
"path": "fib.py",
|
||||
"lines": {
|
||||
"begin": 6,
|
||||
"end": 6
|
||||
}
|
||||
}
|
||||
},
|
||||
"path": "fib.py"
|
||||
},
|
||||
"severity": "major"
|
||||
},
|
||||
{
|
||||
"description": "(F821) Undefined name `a`",
|
||||
"severity": "major",
|
||||
"fingerprint": "<redacted>",
|
||||
"location": {
|
||||
"path": "undef.py",
|
||||
"lines": {
|
||||
"begin": 1,
|
||||
"end": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
"path": "undef.py"
|
||||
},
|
||||
"severity": "major"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -5,79 +5,79 @@ expression: content
|
|||
[
|
||||
{
|
||||
"code": "F401",
|
||||
"message": "`os` imported but unused",
|
||||
"end_location": {
|
||||
"column": 10,
|
||||
"row": 1
|
||||
},
|
||||
"filename": "fib.py",
|
||||
"fix": {
|
||||
"applicability": "Suggested",
|
||||
"message": "Remove unused import: `os`",
|
||||
"edits": [
|
||||
{
|
||||
"content": "",
|
||||
"location": {
|
||||
"row": 1,
|
||||
"column": 1
|
||||
},
|
||||
"end_location": {
|
||||
"row": 2,
|
||||
"column": 1
|
||||
}
|
||||
}
|
||||
]
|
||||
"column": 1,
|
||||
"row": 2
|
||||
},
|
||||
"location": {
|
||||
"row": 1,
|
||||
"column": 8
|
||||
"column": 1,
|
||||
"row": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"message": "Remove unused import: `os`"
|
||||
},
|
||||
"end_location": {
|
||||
"row": 1,
|
||||
"column": 10
|
||||
"location": {
|
||||
"column": 8,
|
||||
"row": 1
|
||||
},
|
||||
"filename": "fib.py",
|
||||
"message": "`os` imported but unused",
|
||||
"noqa_row": 1
|
||||
},
|
||||
{
|
||||
"code": "F841",
|
||||
"message": "Local variable `x` is assigned to but never used",
|
||||
"end_location": {
|
||||
"column": 6,
|
||||
"row": 6
|
||||
},
|
||||
"filename": "fib.py",
|
||||
"fix": {
|
||||
"applicability": "Suggested",
|
||||
"message": "Remove assignment to unused variable `x`",
|
||||
"edits": [
|
||||
{
|
||||
"content": "",
|
||||
"location": {
|
||||
"row": 6,
|
||||
"column": 5
|
||||
},
|
||||
"end_location": {
|
||||
"row": 6,
|
||||
"column": 10
|
||||
}
|
||||
}
|
||||
]
|
||||
"column": 10,
|
||||
"row": 6
|
||||
},
|
||||
"location": {
|
||||
"row": 6,
|
||||
"column": 5
|
||||
"column": 5,
|
||||
"row": 6
|
||||
}
|
||||
}
|
||||
],
|
||||
"message": "Remove assignment to unused variable `x`"
|
||||
},
|
||||
"end_location": {
|
||||
"row": 6,
|
||||
"column": 6
|
||||
"location": {
|
||||
"column": 5,
|
||||
"row": 6
|
||||
},
|
||||
"filename": "fib.py",
|
||||
"message": "Local variable `x` is assigned to but never used",
|
||||
"noqa_row": 6
|
||||
},
|
||||
{
|
||||
"code": "F821",
|
||||
"message": "Undefined name `a`",
|
||||
"fix": null,
|
||||
"location": {
|
||||
"row": 1,
|
||||
"column": 4
|
||||
},
|
||||
"end_location": {
|
||||
"row": 1,
|
||||
"column": 5
|
||||
"column": 5,
|
||||
"row": 1
|
||||
},
|
||||
"filename": "undef.py",
|
||||
"fix": null,
|
||||
"location": {
|
||||
"column": 4,
|
||||
"row": 1
|
||||
},
|
||||
"message": "Undefined name `a`",
|
||||
"noqa_row": 1
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
---
|
||||
source: crates/ruff/src/message/jsonlines.rs
|
||||
source: crates/ruff/src/message/json_lines.rs
|
||||
expression: content
|
||||
---
|
||||
{"code":"F401","message":"`os` imported but unused","fix":{"applicability":"Suggested","message":"Remove unused import: `os`","edits":[{"content":"","location":{"row":1,"column":1},"end_location":{"row":2,"column":1}}]},"location":{"row":1,"column":8},"end_location":{"row":1,"column":10},"filename":"fib.py","noqa_row":1}
|
||||
{"code":"F841","message":"Local variable `x` is assigned to but never used","fix":{"applicability":"Suggested","message":"Remove assignment to unused variable `x`","edits":[{"content":"","location":{"row":6,"column":5},"end_location":{"row":6,"column":10}}]},"location":{"row":6,"column":5},"end_location":{"row":6,"column":6},"filename":"fib.py","noqa_row":6}
|
||||
{"code":"F821","message":"Undefined name `a`","fix":null,"location":{"row":1,"column":4},"end_location":{"row":1,"column":5},"filename":"undef.py","noqa_row":1}
|
||||
{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1}
|
||||
{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6}
|
||||
{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1}
|
||||
|
||||
|
|
|
@ -236,93 +236,3 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "jupyter_notebook")]
|
||||
mod test {
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use path_absolutize::Absolutize;
|
||||
|
||||
use ruff::logging::LogLevel;
|
||||
use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
|
||||
use ruff::settings::configuration::{Configuration, RuleSelection};
|
||||
use ruff::settings::flags::FixMode;
|
||||
use ruff::settings::flags::{Cache, Noqa};
|
||||
use ruff::settings::types::SerializationFormat;
|
||||
use ruff::settings::AllSettings;
|
||||
use ruff::RuleSelector;
|
||||
|
||||
use crate::args::Overrides;
|
||||
use crate::printer::{Flags, Printer};
|
||||
|
||||
use super::run;
|
||||
|
||||
#[test]
|
||||
fn test_jupyter_notebook_integration() -> Result<()> {
|
||||
let overrides: Overrides = Overrides {
|
||||
select: Some(vec![
|
||||
RuleSelector::from_str("B")?,
|
||||
RuleSelector::from_str("F")?,
|
||||
]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut configuration = Configuration::default();
|
||||
configuration.rule_selections.push(RuleSelection {
|
||||
select: Some(vec![
|
||||
RuleSelector::from_str("B")?,
|
||||
RuleSelector::from_str("F")?,
|
||||
]),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("ruff")
|
||||
.join("resources")
|
||||
.join("test")
|
||||
.join("fixtures")
|
||||
.join("jupyter");
|
||||
|
||||
let diagnostics = run(
|
||||
&[root_path.join("valid.ipynb")],
|
||||
&PyprojectConfig::new(
|
||||
PyprojectDiscoveryStrategy::Fixed,
|
||||
AllSettings::from_configuration(configuration, &root_path)?,
|
||||
None,
|
||||
),
|
||||
&overrides,
|
||||
Cache::Disabled,
|
||||
Noqa::Enabled,
|
||||
FixMode::Generate,
|
||||
)?;
|
||||
|
||||
let printer = Printer::new(
|
||||
SerializationFormat::Text,
|
||||
LogLevel::Default,
|
||||
FixMode::Generate,
|
||||
Flags::SHOW_VIOLATIONS,
|
||||
);
|
||||
let mut writer: Vec<u8> = Vec::new();
|
||||
// Mute the terminal color codes.
|
||||
colored::control::set_override(false);
|
||||
printer.write_once(&diagnostics, &mut writer)?;
|
||||
// TODO(konstin): Set jupyter notebooks as none-fixable for now
|
||||
// TODO(konstin): Make jupyter notebooks fixable
|
||||
let expected = format!(
|
||||
"{valid_ipynb}:cell 1:2:5: F841 [*] Local variable `x` is assigned to but never used
|
||||
{valid_ipynb}:cell 3:1:24: B006 Do not use mutable data structures for argument defaults
|
||||
Found 2 errors.
|
||||
[*] 1 potentially fixable with the --fix option.
|
||||
",
|
||||
valid_ipynb = root_path.join("valid.ipynb").absolutize()?.display()
|
||||
);
|
||||
|
||||
assert_eq!(expected, String::from_utf8(writer)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,33 +91,33 @@ fn stdin_json() -> Result<()> {
|
|||
r#"[
|
||||
{{
|
||||
"code": "F401",
|
||||
"message": "`os` imported but unused",
|
||||
"end_location": {{
|
||||
"column": 10,
|
||||
"row": 1
|
||||
}},
|
||||
"filename": "{file_path}",
|
||||
"fix": {{
|
||||
"applicability": "Automatic",
|
||||
"message": "Remove unused import: `os`",
|
||||
"edits": [
|
||||
{{
|
||||
"content": "",
|
||||
"location": {{
|
||||
"row": 1,
|
||||
"column": 1
|
||||
}},
|
||||
"end_location": {{
|
||||
"row": 2,
|
||||
"column": 1
|
||||
}}
|
||||
}}
|
||||
]
|
||||
"column": 1,
|
||||
"row": 2
|
||||
}},
|
||||
"location": {{
|
||||
"row": 1,
|
||||
"column": 8
|
||||
"column": 1,
|
||||
"row": 1
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"message": "Remove unused import: `os`"
|
||||
}},
|
||||
"end_location": {{
|
||||
"row": 1,
|
||||
"column": 10
|
||||
"location": {{
|
||||
"column": 8,
|
||||
"row": 1
|
||||
}},
|
||||
"filename": "{file_path}",
|
||||
"message": "`os` imported but unused",
|
||||
"noqa_row": 1
|
||||
}}
|
||||
]"#
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue