Maintain consistency when deserializing to JSON (#5114)

## Summary

Maintain consistency while deserializing Jupyter notebook to JSON. The
following changes were made:

1. Use string array to store the source value as that's the default
(5781720423/nbformat/v4/nbjson.py (L56-L57))
2. Remove unused structs and enums
3. Reorder the keys in alphabetical order as that's the default.
(5781720423/nbformat/v4/nbjson.py (L51))

### Side effect

Removing the `preserve_order` feature means that the order of keys in
JSON output (`--format json`) will be in alphabetical order. This is
because the value is represented using `serde_json::Value` which
internally is a `BTreeMap`, thus sorting it as per the string key. For
posterity if this turns out to be not ideal, then we could define a
struct representing the JSON object and the order of struct fields will
determine the order in the JSON string.

## Test Plan

Add a test case to assert the raw JSON string.
This commit is contained in:
Dhruv Manilawala 2023-06-19 23:47:56 +05:30 committed by GitHub
parent 94abf7f088
commit 48f4f2d63d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 346 additions and 370 deletions

1
Cargo.lock generated
View file

@ -2368,7 +2368,6 @@ version = "1.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",

View file

@ -43,7 +43,7 @@ rustpython-literal = { git = "https://github.com/astral-sh/RustPython-Parser.git
rustpython-parser = { git = "https://github.com/astral-sh/RustPython-Parser.git", rev = "0dc8fdf52d146698c5bcf0b842fddc9e398ad8db", default-features = false, features = ["full-lexer", "all-nodes-with-ranges"] }
schemars = { version = "0.8.12" }
serde = { version = "1.0.152", features = ["derive"] }
serde_json = { version = "1.0.93", features = ["preserve_order"] }
serde_json = { version = "1.0.93" }
shellexpand = { version = "3.0.0" }
similar = { version = "2.2.1", features = ["inline"] }
smallvec = { version = "1.10.0" }

View file

@ -0,0 +1,37 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"\n",
"math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,38 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import os\n",
"\n",
"math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,5 +1,8 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {},
"outputs": [],
"source": ["def foo():\n", " pass\n", "\n", "%timeit foo()"]
}

View file

@ -1,5 +1,6 @@
{
"cell_type": "markdown",
"id": "1",
"metadata": {},
"source": ["This is a markdown cell\n", "Some more content"]
}

View file

@ -1,5 +1,8 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {},
"outputs": [],
"source": ["def foo():\n", " pass"]
}

View file

@ -1,5 +1,8 @@
{
"execution_count": null,
"cell_type": "code",
"id": "1",
"metadata": {},
"outputs": [],
"source": "%timeit print('hello world')"
}

View file

@ -1,6 +1,6 @@
use std::cmp::Ordering;
use std::fs::File;
use std::io::{BufReader, BufWriter, Cursor, Write};
use std::io::{BufReader, BufWriter, Write};
use std::iter;
use std::path::Path;
@ -10,12 +10,12 @@ use serde::Serialize;
use serde_json::error::Category;
use ruff_diagnostics::Diagnostic;
use ruff_python_whitespace::NewlineWithTrailingNewline;
use ruff_python_whitespace::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_text_size::{TextRange, TextSize};
use crate::autofix::source_map::{SourceMap, SourceMarker};
use crate::jupyter::index::JupyterIndex;
use crate::jupyter::{Cell, CellType, RawNotebook, SourceValue};
use crate::jupyter::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
use crate::rules::pycodestyle::rules::SyntaxError;
use crate::IOError;
@ -34,9 +34,9 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
})?;
let code = notebook.content().to_string();
notebook.update_cell_content(&code);
let mut buffer = Cursor::new(Vec::new());
let mut buffer = BufWriter::new(Vec::new());
notebook.write_inner(&mut buffer)?;
Ok(String::from_utf8(buffer.into_inner())?)
Ok(String::from_utf8(buffer.into_inner()?)?)
}
/// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`).
@ -49,18 +49,37 @@ pub fn is_jupyter_notebook(path: &Path) -> bool {
}
impl Cell {
/// Return the [`SourceValue`] of the cell.
fn source(&self) -> &SourceValue {
match self {
Cell::Code(cell) => &cell.source,
Cell::Markdown(cell) => &cell.source,
Cell::Raw(cell) => &cell.source,
}
}
/// Update the [`SourceValue`] of the cell.
fn set_source(&mut self, source: SourceValue) {
match self {
Cell::Code(cell) => cell.source = source,
Cell::Markdown(cell) => cell.source = source,
Cell::Raw(cell) => cell.source = source,
}
}
/// Return `true` if it's a valid code cell.
///
/// A valid code cell is a cell where the type is [`CellType::Code`] and the
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
/// source doesn't contain a magic, shell or help command.
fn is_valid_code_cell(&self) -> bool {
if self.cell_type != CellType::Code {
return false;
}
let source = match self {
Cell::Code(cell) => &cell.source,
_ => return false,
};
// Ignore a cell if it contains a magic command. There could be valid
// Python code as well, but we'll ignore that for now.
// TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py
!match &self.source {
!match source {
SourceValue::String(string) => string.lines().any(|line| {
MAGIC_PREFIX
.iter()
@ -92,7 +111,7 @@ pub struct Notebook {
/// The offsets of each cell in the concatenated source code. This includes
/// the first and last character offsets as well.
cell_offsets: Vec<TextSize>,
/// The cell numbers of all valid code cells in the notebook.
/// The cell index of all valid code cells in the notebook.
valid_code_cells: Vec<u32>,
}
@ -108,7 +127,7 @@ impl Notebook {
TextRange::default(),
)
})?);
let notebook: RawNotebook = match serde_json::from_reader(reader) {
let raw_notebook: RawNotebook = match serde_json::from_reader(reader) {
Ok(notebook) => notebook,
Err(err) => {
// Translate the error into a diagnostic
@ -176,34 +195,34 @@ impl Notebook {
};
// v4 is what everybody uses
if notebook.nbformat != 4 {
if raw_notebook.nbformat != 4 {
// bail because we should have already failed at the json schema stage
return Err(Box::new(Diagnostic::new(
SyntaxError {
message: format!(
"Expected Jupyter Notebook format 4, found {}",
notebook.nbformat
raw_notebook.nbformat
),
},
TextRange::default(),
)));
}
let valid_code_cells = notebook
let valid_code_cells = raw_notebook
.cells
.iter()
.enumerate()
.filter(|(_, cell)| cell.is_valid_code_cell())
.map(|(pos, _)| u32::try_from(pos).unwrap())
.map(|(idx, _)| u32::try_from(idx).unwrap())
.collect::<Vec<_>>();
let mut contents = Vec::with_capacity(valid_code_cells.len());
let mut current_offset = TextSize::from(0);
let mut cell_offsets = Vec::with_capacity(notebook.cells.len());
let mut cell_offsets = Vec::with_capacity(valid_code_cells.len());
cell_offsets.push(TextSize::from(0));
for &pos in &valid_code_cells {
let cell_contents = match &notebook.cells[pos as usize].source {
for &idx in &valid_code_cells {
let cell_contents = match &raw_notebook.cells[idx as usize].source() {
SourceValue::String(string) => string.clone(),
SourceValue::StringArray(string_array) => string_array.join(""),
};
@ -213,7 +232,7 @@ impl Notebook {
}
Ok(Self {
raw: notebook,
raw: raw_notebook,
index: OnceCell::new(),
// The additional newline at the end is to maintain consistency for
// all cells. These newlines will be removed before updating the
@ -267,7 +286,7 @@ impl Notebook {
/// can happen only if the cell offsets were not updated before calling
/// this method or the offsets were updated incorrectly.
fn update_cell_content(&mut self, transformed: &str) {
for (&pos, (start, end)) in self
for (&idx, (start, end)) in self
.valid_code_cells
.iter()
.zip(self.cell_offsets.iter().tuple_windows::<(_, _)>())
@ -275,22 +294,25 @@ impl Notebook {
let cell_content = transformed
.get(start.to_usize()..end.to_usize())
.unwrap_or_else(|| {
panic!("Transformed content out of bounds ({start:?}..{end:?}) for cell {pos}");
panic!(
"Transformed content out of bounds ({start:?}..{end:?}) for cell at {idx:?}"
);
});
self.raw.cells[pos as usize].source = SourceValue::String(
cell_content
self.raw.cells[idx as usize].set_source(SourceValue::StringArray(
UniversalNewlineIterator::from(
// We only need to strip the trailing newline which we added
// while concatenating the cell contents.
.strip_suffix('\n')
.unwrap_or(cell_content)
.to_string(),
);
cell_content.strip_suffix('\n').unwrap_or(cell_content),
)
.map(|line| line.as_full_str().to_string())
.collect::<Vec<_>>(),
));
}
}
/// Build and return the [`JupyterIndex`].
///
/// # Notes
/// ## Notes
///
/// Empty cells don't have any newlines, but there's a single visible line
/// in the UI. That single line needs to be accounted for.
@ -317,8 +339,8 @@ impl Notebook {
let mut row_to_cell = vec![0];
let mut row_to_row_in_cell = vec![0];
for &pos in &self.valid_code_cells {
let line_count = match &self.raw.cells[pos as usize].source {
for &idx in &self.valid_code_cells {
let line_count = match &self.raw.cells[idx as usize].source() {
SourceValue::String(string) => {
if string.is_empty() {
1
@ -336,7 +358,7 @@ impl Notebook {
}
}
};
row_to_cell.extend(iter::repeat(pos + 1).take(line_count as usize));
row_to_cell.extend(iter::repeat(idx + 1).take(line_count as usize));
row_to_row_in_cell.extend(1..=line_count);
}
@ -390,7 +412,7 @@ impl Notebook {
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut ser = serde_json::Serializer::with_formatter(writer, formatter);
self.raw.serialize(&mut ser)?;
SortAlphabetically(&self.raw).serialize(&mut ser)?;
Ok(())
}
@ -404,6 +426,7 @@ impl Notebook {
#[cfg(test)]
mod test {
use std::io::BufWriter;
use std::path::Path;
use anyhow::Result;
@ -536,4 +559,21 @@ print("after empty cells")
assert_messages!(diagnostics, path, source_kind);
Ok(())
}
#[test]
fn test_json_consistency() -> Result<()> {
let path = "before_fix.ipynb".to_string();
let (_, source_kind) = test_notebook_path(
path,
Path::new("after_fix.ipynb"),
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
let mut writer = BufWriter::new(Vec::new());
source_kind.expect_jupyter().write_inner(&mut writer)?;
let actual = String::from_utf8(writer.into_inner()?)?;
let expected =
std::fs::read_to_string(test_resource_path("fixtures/jupyter/after_fix.ipynb"))?;
assert_eq!(actual, expected);
Ok(())
}
}

View file

@ -5,6 +5,7 @@
//! Jupyter Notebook v4.5 JSON schema.
//!
//! The following changes were made to the generated version:
//! * Only keep the required structs and enums.
//! * `Cell::id` is optional because it wasn't required <v4.5
//! * `#[serde(deny_unknown_fields)]` was added where the schema had
//! `"additionalProperties": false`
@ -12,26 +13,84 @@
//! `"additionalProperties": true` as preparation for round-trip support.
//! * `#[serde(skip_serializing_none)]` was added to all structs where one or
//! more fields were optional to avoid serializing `null` values.
//! * `Output::data` & `Cell::attachments` were changed to `Value` because
//! the scheme had `patternProperties`.
//! * `Cell::execution_count` is a required property only for code cells, but
//! we serialize it for all cells. This is because we can't know if a cell is
//! a code cell or not without looking at the `cell_type` property, which
//! would require a custom serializer.
use std::collections::{BTreeMap, HashMap};
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_with::skip_serializing_none;
fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
value: &T,
serializer: S,
) -> Result<S::Ok, S::Error> {
let value = serde_json::to_value(value).map_err(serde::ser::Error::custom)?;
value.serialize(serializer)
}
/// This is used to serialize any value implementing [`Serialize`] alphabetically.
///
/// The reason for this is to maintain consistency in the generated JSON string,
/// which is useful for diffing. The default serializer keeps the order of the
/// fields as they are defined in the struct, which will not be consistent when
/// there are `extra` fields.
///
/// # Example
///
/// ```
/// use std::collections::BTreeMap;
///
/// use serde::Serialize;
///
/// use ruff::jupyter::SortAlphabetically;
///
/// #[derive(Serialize)]
/// struct MyStruct {
/// a: String,
/// #[serde(flatten)]
/// extra: BTreeMap<String, String>,
/// b: String,
/// }
///
/// let my_struct = MyStruct {
/// a: "a".to_string(),
/// extra: BTreeMap::from([
/// ("d".to_string(), "d".to_string()),
/// ("c".to_string(), "c".to_string()),
/// ]),
/// b: "b".to_string(),
/// };
///
/// let serialized = serde_json::to_string_pretty(&SortAlphabetically(&my_struct)).unwrap();
/// assert_eq!(
/// serialized,
/// r#"{
/// "a": "a",
/// "b": "b",
/// "c": "c",
/// "d": "d"
/// }"#
/// );
/// ```
#[derive(Serialize)]
pub struct SortAlphabetically<T: Serialize>(#[serde(serialize_with = "sort_alphabetically")] pub T);
/// The root of the JSON of a Jupyter Notebook
///
/// Generated by <https://app.quicktype.io/> from
/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
/// Jupyter Notebook v4.5 JSON schema.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct RawNotebook {
/// Array of cells of the current notebook.
pub cells: Vec<Cell>,
/// Notebook root-level metadata.
pub metadata: JupyterNotebookMetadata,
pub metadata: RawNotebookMetadata,
/// Notebook format (major number). Incremented between backwards incompatible changes to the
/// notebook format.
pub nbformat: i64,
@ -40,119 +99,73 @@ pub struct RawNotebook {
pub nbformat_minor: i64,
}
/// String identifying the type of cell.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "cell_type")]
pub enum Cell {
#[serde(rename = "code")]
Code(CodeCell),
#[serde(rename = "markdown")]
Markdown(MarkdownCell),
#[serde(rename = "raw")]
Raw(RawCell),
}
/// Notebook raw nbconvert cell.
///
/// Notebook markdown cell.
///
/// Notebook code cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Cell {
pub attachments: Option<HashMap<String, HashMap<String, Value>>>,
/// String identifying the type of cell.
pub cell_type: CellType,
pub struct RawCell {
pub attachments: Option<Value>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: CellMetadata,
pub metadata: Value,
pub source: SourceValue,
}
/// Notebook markdown cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct MarkdownCell {
pub attachments: Option<Value>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
pub source: SourceValue,
}
/// Notebook code cell.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct CodeCell {
/// The code cell's prompt number. Will be null if the cell has not been run.
pub execution_count: Option<i64>,
/// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
/// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
/// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
pub id: Option<String>,
/// Cell-level metadata.
pub metadata: Value,
/// Execution, display, or stream outputs.
pub outputs: Option<Vec<Output>>,
}
/// Cell-level metadata.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct CellMetadata {
/// Raw cell metadata format for nbconvert.
pub format: Option<String>,
/// Official Jupyter Metadata for Raw Cells
///
/// Official Jupyter Metadata for Markdown Cells
///
/// Official Jupyter Metadata for Code Cells
pub jupyter: Option<HashMap<String, Option<Value>>>,
pub name: Option<String>,
pub tags: Option<Vec<String>>,
/// Whether the cell's output is collapsed/expanded.
pub collapsed: Option<bool>,
/// Execution time for the code in the cell. This tracks time at which messages are received
/// from iopub or shell channels
pub execution: Option<Execution>,
/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
pub scrolled: Option<ScrolledUnion>,
/// Custom added: round-trip support
#[serde(flatten)]
pub other: BTreeMap<String, Value>,
}
/// Execution time for the code in the cell. This tracks time at which messages are received
/// from iopub or shell channels
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Execution {
/// header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates
/// the time at which the kernel broadcasts an execute_input message to connected frontends
#[serde(rename = "iopub.execute_input")]
pub iopub_execute_input: Option<String>,
/// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
/// is 'busy'
#[serde(rename = "iopub.status.busy")]
pub iopub_status_busy: Option<String>,
/// header.date (in ISO 8601 format) of iopub channel's kernel status message when the status
/// is 'idle'. It indicates the time at which kernel finished processing the associated
/// request
#[serde(rename = "iopub.status.idle")]
pub iopub_status_idle: Option<String>,
/// header.date (in ISO 8601 format) of the shell channel's execute_reply message. It
/// indicates the time at which the execute_reply message was created
#[serde(rename = "shell.execute_reply")]
pub shell_execute_reply: Option<String>,
}
/// Result of executing a code cell.
///
/// Data displayed as a result of code cell execution.
///
/// Stream output from a code cell.
///
/// Output of an error that occurred during code cell execution.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Output {
pub data: Option<HashMap<String, Value>>,
/// A result's prompt number.
pub execution_count: Option<i64>,
pub metadata: Option<HashMap<String, Option<Value>>>,
/// Type of cell output.
pub output_type: OutputType,
/// The name of the stream (stdout, stderr).
pub name: Option<String>,
/// The stream's text output, represented as an array of strings.
pub text: Option<TextUnion>,
/// The name of the error.
pub ename: Option<String>,
/// The value, or message, of the error.
pub evalue: Option<String>,
/// The error's traceback, represented as an array of strings.
pub traceback: Option<Vec<String>>,
pub outputs: Vec<Value>,
pub source: SourceValue,
}
/// Notebook root-level metadata.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct JupyterNotebookMetadata {
pub struct RawNotebookMetadata {
/// The author(s) of the notebook document
pub authors: Option<Vec<Option<Value>>>,
pub authors: Option<Value>,
/// Kernel information.
pub kernelspec: Option<Kernelspec>,
pub kernelspec: Option<Value>,
/// Kernel information.
pub language_info: Option<LanguageInfo>,
/// Original notebook format (major number) before converting the notebook between versions.
@ -160,21 +173,9 @@ pub struct JupyterNotebookMetadata {
pub orig_nbformat: Option<i64>,
/// The title of the notebook document
pub title: Option<String>,
/// Custom added: round-trip support
/// For additional properties.
#[serde(flatten)]
pub other: BTreeMap<String, Value>,
}
/// Kernel information.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct Kernelspec {
/// Name to display in UI.
pub display_name: String,
/// Name of the kernel specification.
pub name: String,
/// Custom added: round-trip support
#[serde(flatten)]
pub other: BTreeMap<String, Value>,
pub extra: BTreeMap<String, Value>,
}
/// Kernel information.
@ -182,7 +183,7 @@ pub struct Kernelspec {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct LanguageInfo {
/// The codemirror mode to use for code in this language.
pub codemirror_mode: Option<CodemirrorMode>,
pub codemirror_mode: Option<Value>,
/// The file extension for files in this language.
pub file_extension: Option<String>,
/// The mimetype corresponding to files in this language.
@ -191,9 +192,9 @@ pub struct LanguageInfo {
pub name: String,
/// The pygments lexer to use for code in this language.
pub pygments_lexer: Option<String>,
/// Custom added: round-trip support
/// For additional properties.
#[serde(flatten)]
pub other: BTreeMap<String, Value>,
pub extra: BTreeMap<String, Value>,
}
/// mimetype output (e.g. text/plain), represented as either an array of strings or a
@ -208,62 +209,3 @@ pub enum SourceValue {
String(String),
StringArray(Vec<String>),
}
/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum ScrolledUnion {
Bool(bool),
Enum(ScrolledEnum),
}
/// mimetype output (e.g. text/plain), represented as either an array of strings or a
/// string.
///
/// Contents of the cell, represented as an array of lines.
///
/// The stream's text output, represented as an array of strings.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum TextUnion {
String(String),
StringArray(Vec<String>),
}
/// The codemirror mode to use for code in this language.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum CodemirrorMode {
AnythingMap(HashMap<String, Option<Value>>),
String(String),
}
/// String identifying the type of cell.
#[derive(Debug, Serialize, Deserialize, PartialEq, Copy, Clone)]
pub enum CellType {
#[serde(rename = "code")]
Code,
#[serde(rename = "markdown")]
Markdown,
#[serde(rename = "raw")]
Raw,
}
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
pub enum ScrolledEnum {
#[serde(rename = "auto")]
Auto,
}
/// Type of cell output.
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq)]
pub enum OutputType {
#[serde(rename = "display_data")]
DisplayData,
#[serde(rename = "error")]
Error,
#[serde(rename = "execute_result")]
ExecuteResult,
#[serde(rename = "stream")]
Stream,
}

View file

@ -5,38 +5,38 @@ expression: redact_fingerprint(&content)
[
{
"description": "(F401) `os` imported but unused",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "fib.py",
"lines": {
"begin": 1,
"end": 1
}
}
},
"path": "fib.py"
},
"severity": "major"
},
{
"description": "(F841) Local variable `x` is assigned to but never used",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "fib.py",
"lines": {
"begin": 6,
"end": 6
}
}
},
"path": "fib.py"
},
"severity": "major"
},
{
"description": "(F821) Undefined name `a`",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "undef.py",
"lines": {
"begin": 1,
"end": 1
}
}
},
"path": "undef.py"
},
"severity": "major"
}
]

View file

@ -5,79 +5,79 @@ expression: content
[
{
"code": "F401",
"message": "`os` imported but unused",
"end_location": {
"column": 10,
"row": 1
},
"filename": "fib.py",
"fix": {
"applicability": "Suggested",
"message": "Remove unused import: `os`",
"edits": [
{
"content": "",
"location": {
"row": 1,
"column": 1
},
"end_location": {
"row": 2,
"column": 1
"column": 1,
"row": 2
},
"location": {
"column": 1,
"row": 1
}
}
]
],
"message": "Remove unused import: `os`"
},
"location": {
"row": 1,
"column": 8
"column": 8,
"row": 1
},
"end_location": {
"row": 1,
"column": 10
},
"filename": "fib.py",
"message": "`os` imported but unused",
"noqa_row": 1
},
{
"code": "F841",
"message": "Local variable `x` is assigned to but never used",
"end_location": {
"column": 6,
"row": 6
},
"filename": "fib.py",
"fix": {
"applicability": "Suggested",
"message": "Remove assignment to unused variable `x`",
"edits": [
{
"content": "",
"location": {
"row": 6,
"column": 5
},
"end_location": {
"row": 6,
"column": 10
"column": 10,
"row": 6
},
"location": {
"column": 5,
"row": 6
}
}
]
],
"message": "Remove assignment to unused variable `x`"
},
"location": {
"row": 6,
"column": 5
"column": 5,
"row": 6
},
"end_location": {
"row": 6,
"column": 6
},
"filename": "fib.py",
"message": "Local variable `x` is assigned to but never used",
"noqa_row": 6
},
{
"code": "F821",
"message": "Undefined name `a`",
"fix": null,
"location": {
"row": 1,
"column": 4
},
"end_location": {
"row": 1,
"column": 5
"column": 5,
"row": 1
},
"filename": "undef.py",
"fix": null,
"location": {
"column": 4,
"row": 1
},
"message": "Undefined name `a`",
"noqa_row": 1
}
]

View file

@ -1,8 +1,8 @@
---
source: crates/ruff/src/message/jsonlines.rs
source: crates/ruff/src/message/json_lines.rs
expression: content
---
{"code":"F401","message":"`os` imported but unused","fix":{"applicability":"Suggested","message":"Remove unused import: `os`","edits":[{"content":"","location":{"row":1,"column":1},"end_location":{"row":2,"column":1}}]},"location":{"row":1,"column":8},"end_location":{"row":1,"column":10},"filename":"fib.py","noqa_row":1}
{"code":"F841","message":"Local variable `x` is assigned to but never used","fix":{"applicability":"Suggested","message":"Remove assignment to unused variable `x`","edits":[{"content":"","location":{"row":6,"column":5},"end_location":{"row":6,"column":10}}]},"location":{"row":6,"column":5},"end_location":{"row":6,"column":6},"filename":"fib.py","noqa_row":6}
{"code":"F821","message":"Undefined name `a`","fix":null,"location":{"row":1,"column":4},"end_location":{"row":1,"column":5},"filename":"undef.py","noqa_row":1}
{"code":"F401","end_location":{"column":10,"row":1},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":1,"row":2},"location":{"column":1,"row":1}}],"message":"Remove unused import: `os`"},"location":{"column":8,"row":1},"message":"`os` imported but unused","noqa_row":1}
{"code":"F841","end_location":{"column":6,"row":6},"filename":"fib.py","fix":{"applicability":"Suggested","edits":[{"content":"","end_location":{"column":10,"row":6},"location":{"column":5,"row":6}}],"message":"Remove assignment to unused variable `x`"},"location":{"column":5,"row":6},"message":"Local variable `x` is assigned to but never used","noqa_row":6}
{"code":"F821","end_location":{"column":5,"row":1},"filename":"undef.py","fix":null,"location":{"column":4,"row":1},"message":"Undefined name `a`","noqa_row":1}

View file

@ -236,93 +236,3 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
}
}
}
#[cfg(test)]
#[cfg(feature = "jupyter_notebook")]
mod test {
use std::path::PathBuf;
use std::str::FromStr;
use anyhow::Result;
use path_absolutize::Absolutize;
use ruff::logging::LogLevel;
use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
use ruff::settings::configuration::{Configuration, RuleSelection};
use ruff::settings::flags::FixMode;
use ruff::settings::flags::{Cache, Noqa};
use ruff::settings::types::SerializationFormat;
use ruff::settings::AllSettings;
use ruff::RuleSelector;
use crate::args::Overrides;
use crate::printer::{Flags, Printer};
use super::run;
#[test]
fn test_jupyter_notebook_integration() -> Result<()> {
let overrides: Overrides = Overrides {
select: Some(vec![
RuleSelector::from_str("B")?,
RuleSelector::from_str("F")?,
]),
..Default::default()
};
let mut configuration = Configuration::default();
configuration.rule_selections.push(RuleSelection {
select: Some(vec![
RuleSelector::from_str("B")?,
RuleSelector::from_str("F")?,
]),
..Default::default()
});
let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("ruff")
.join("resources")
.join("test")
.join("fixtures")
.join("jupyter");
let diagnostics = run(
&[root_path.join("valid.ipynb")],
&PyprojectConfig::new(
PyprojectDiscoveryStrategy::Fixed,
AllSettings::from_configuration(configuration, &root_path)?,
None,
),
&overrides,
Cache::Disabled,
Noqa::Enabled,
FixMode::Generate,
)?;
let printer = Printer::new(
SerializationFormat::Text,
LogLevel::Default,
FixMode::Generate,
Flags::SHOW_VIOLATIONS,
);
let mut writer: Vec<u8> = Vec::new();
// Mute the terminal color codes.
colored::control::set_override(false);
printer.write_once(&diagnostics, &mut writer)?;
// TODO(konstin): Set jupyter notebooks as none-fixable for now
// TODO(konstin): Make jupyter notebooks fixable
let expected = format!(
"{valid_ipynb}:cell 1:2:5: F841 [*] Local variable `x` is assigned to but never used
{valid_ipynb}:cell 3:1:24: B006 Do not use mutable data structures for argument defaults
Found 2 errors.
[*] 1 potentially fixable with the --fix option.
",
valid_ipynb = root_path.join("valid.ipynb").absolutize()?.display()
);
assert_eq!(expected, String::from_utf8(writer)?);
Ok(())
}
}

View file

@ -91,33 +91,33 @@ fn stdin_json() -> Result<()> {
r#"[
{{
"code": "F401",
"message": "`os` imported but unused",
"end_location": {{
"column": 10,
"row": 1
}},
"filename": "{file_path}",
"fix": {{
"applicability": "Automatic",
"message": "Remove unused import: `os`",
"edits": [
{{
"content": "",
"location": {{
"row": 1,
"column": 1
}},
"end_location": {{
"row": 2,
"column": 1
"column": 1,
"row": 2
}},
"location": {{
"column": 1,
"row": 1
}}
}}
]
],
"message": "Remove unused import: `os`"
}},
"location": {{
"row": 1,
"column": 8
"column": 8,
"row": 1
}},
"end_location": {{
"row": 1,
"column": 10
}},
"filename": "{file_path}",
"message": "`os` imported but unused",
"noqa_row": 1
}}
]"#