Ability to perform integration test on Jupyter notebooks (#5076)

## Summary

Ability to perform integration test on Jupyter notebooks

Part of #1218

## Test Plan

`cargo test`
This commit is contained in:
Dhruv Manilawala 2023-06-15 08:04:27 +05:30 committed by GitHub
parent ed8113267c
commit 097823b56d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 286 additions and 34 deletions

View file

@ -0,0 +1,51 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0c7535f6-43cb-423f-bfe1-d263b8f55da0",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import random\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c066fa1a-5682-47af-8c17-5afec3cf4ad0",
"metadata": {},
"outputs": [],
"source": [
"from typing import Any\n",
"import collections\n",
"# Newline should be added here\n",
"def foo():\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,53 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "663ba955-baca-4f34-9ebb-840d2573ae3f",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import random\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0adfe23-8aea-47e9-bf67-d856cfcb96ea",
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"from typing import Any\n",
"\n",
"\n",
"# Newline should be added here\n",
"def foo():\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -75,7 +75,7 @@ impl Cell {
}
}
#[derive(Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq)]
pub struct Notebook {
/// Python source code of the notebook.
///
@ -414,8 +414,9 @@ mod test {
use crate::jupyter::is_jupyter_notebook;
use crate::jupyter::schema::Cell;
use crate::jupyter::Notebook;
use crate::test::test_resource_path;
use crate::registry::Rule;
use crate::test::{test_notebook_path, test_resource_path};
use crate::{assert_messages, settings};
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
@ -523,4 +524,16 @@ print("after empty cells")
]
);
}
#[test]
fn test_import_sorting() -> Result<()> {
let path = "isort.ipynb".to_string();
let (diagnostics, source_kind) = test_notebook_path(
&path,
Path::new("isort_expected.ipynb"),
&settings::Settings::for_rule(Rule::UnsortedImports),
)?;
assert_messages!(diagnostics, path, source_kind);
Ok(())
}
}

View file

@ -26,7 +26,7 @@ use serde_with::skip_serializing_none;
/// Generated by <https://app.quicktype.io/> from
/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
/// Jupyter Notebook v4.5 JSON schema.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct RawNotebook {
/// Array of cells of the current notebook.
pub cells: Vec<Cell>,
@ -46,7 +46,7 @@ pub struct RawNotebook {
///
/// Notebook code cell.
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Cell {
pub attachments: Option<HashMap<String, HashMap<String, Value>>>,
@ -67,7 +67,7 @@ pub struct Cell {
/// Cell-level metadata.
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct CellMetadata {
/// Raw cell metadata format for nbconvert.
pub format: Option<String>,
@ -94,7 +94,7 @@ pub struct CellMetadata {
/// Execution time for the code in the cell. This tracks time at which messages are received
/// from iopub or shell channels
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Execution {
/// header.date (in ISO 8601 format) of iopub channel's execute_input message. It indicates
@ -124,7 +124,7 @@ pub struct Execution {
///
/// Output of an error that occurred during code cell execution.
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Output {
pub data: Option<HashMap<String, Value>>,
@ -147,7 +147,7 @@ pub struct Output {
/// Notebook root-level metadata.
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct JupyterNotebookMetadata {
/// The author(s) of the notebook document
pub authors: Option<Vec<Option<Value>>>,
@ -166,7 +166,7 @@ pub struct JupyterNotebookMetadata {
}
/// Kernel information.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct Kernelspec {
/// Name to display in UI.
pub display_name: String,
@ -179,7 +179,7 @@ pub struct Kernelspec {
/// Kernel information.
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct LanguageInfo {
/// The codemirror mode to use for code in this language.
pub codemirror_mode: Option<CodemirrorMode>,
@ -202,7 +202,7 @@ pub struct LanguageInfo {
/// Contents of the cell, represented as an array of lines.
///
/// The stream's text output, represented as an array of strings.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum SourceValue {
String(String),
@ -210,7 +210,7 @@ pub enum SourceValue {
}
/// Whether the cell's output is scrolled, unscrolled, or autoscrolled.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum ScrolledUnion {
Bool(bool),
@ -223,7 +223,7 @@ pub enum ScrolledUnion {
/// Contents of the cell, represented as an array of lines.
///
/// The stream's text output, represented as an array of strings.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum TextUnion {
String(String),
@ -231,7 +231,7 @@ pub enum TextUnion {
}
/// The codemirror mode to use for code in this language.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum CodemirrorMode {
AnythingMap(HashMap<String, Option<Value>>),

View file

@ -0,0 +1,52 @@
---
source: crates/ruff/src/jupyter/notebook.rs
---
isort.ipynb:cell 1:1:1: I001 [*] Import block is un-sorted or un-formatted
|
1 | / from pathlib import Path
2 | | import random
3 | | import math
4 | | from typing import Any
| |_^ I001
5 | import collections
6 | # Newline should be added here
|
= help: Organize imports
Fix
1 |+import math
2 |+import random
1 3 | from pathlib import Path
2 |-import random
3 |-import math
4 4 | from typing import Any
5 5 | import collections
6 6 | # Newline should be added here
isort.ipynb:cell 2:1:1: I001 [*] Import block is un-sorted or un-formatted
|
2 | import random
3 | import math
4 | / from typing import Any
5 | | import collections
6 | | # Newline should be added here
| |_^ I001
7 | def foo():
8 | pass
|
= help: Organize imports
Fix
1 1 | from pathlib import Path
2 2 | import random
3 3 | import math
4 |+import collections
4 5 | from typing import Any
5 |-import collections
6 |+
7 |+
6 8 | # Newline should be added here
7 9 | def foo():
8 10 | pass

View file

@ -1,6 +1,6 @@
use crate::jupyter::Notebook;
#[derive(Debug, PartialEq, is_macro::Is)]
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum SourceKind {
Python(String),
Jupyter(Notebook),

View file

@ -15,12 +15,25 @@ use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist}
use crate::autofix::{fix_file, FixResult};
use crate::directives;
use crate::jupyter::Notebook;
use crate::linter::{check_path, LinterResult};
use crate::message::{Emitter, EmitterContext, Message, TextEmitter};
use crate::packaging::detect_package_root;
use crate::registry::AsRule;
use crate::rules::pycodestyle::rules::syntax_error;
use crate::settings::{flags, Settings};
use crate::source_kind::SourceKind;
#[cfg(not(fuzzing))]
fn read_jupyter_notebook(path: &Path) -> Result<Notebook> {
Notebook::read(path).map_err(|err| {
anyhow::anyhow!(
"Failed to read notebook file `{}`: {:?}",
path.display(),
err
)
})
}
#[cfg(not(fuzzing))]
pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
@ -32,14 +45,41 @@ pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<Vec<Message>> {
let path = test_resource_path("fixtures").join(path);
let contents = std::fs::read_to_string(&path)?;
Ok(test_contents(&contents, &path, settings))
Ok(test_contents(
&mut SourceKind::Python(contents),
&path,
settings,
))
}
#[cfg(not(fuzzing))]
pub(crate) fn test_notebook_path(
path: impl AsRef<Path>,
expected: impl AsRef<Path>,
settings: &Settings,
) -> Result<(Vec<Message>, SourceKind)> {
let path = test_resource_path("fixtures/jupyter").join(path);
let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(&path)?);
let messages = test_contents(&mut source_kind, &path, settings);
let expected_notebook =
read_jupyter_notebook(&test_resource_path("fixtures/jupyter").join(expected))?;
if let SourceKind::Jupyter(notebook) = &source_kind {
assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
assert_eq!(notebook.index(), expected_notebook.index());
assert_eq!(notebook.content(), expected_notebook.content());
};
Ok((messages, source_kind))
}
/// Run [`check_path`] on a snippet of Python code.
pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
let path = Path::new("<filename>");
let contents = dedent(contents);
test_contents(&contents, path, settings)
test_contents(
&mut SourceKind::Python(contents.to_string()),
path,
settings,
)
}
thread_local! {
@ -56,9 +96,10 @@ pub(crate) fn max_iterations() -> usize {
/// A convenient wrapper around [`check_path`], that additionally
/// asserts that autofixes converge after a fixed number of iterations.
fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Message> {
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
let locator = Locator::new(contents);
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
let contents = source_kind.content().to_string();
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
let directives = directives::extract_directives(
@ -81,7 +122,7 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
&directives,
settings,
flags::Noqa::Enabled,
None,
Some(source_kind),
);
let source_has_errors = error.is_some();
@ -98,13 +139,14 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
while let Some(FixResult {
code: fixed_contents,
source_map,
..
}) = fix_file(&diagnostics, &Locator::new(&contents))
{
if iterations < max_iterations() {
iterations += 1;
} else {
let output = print_diagnostics(diagnostics, path, &contents);
let output = print_diagnostics(diagnostics, path, &contents, source_kind);
panic!(
"Failed to converge after {} iterations. This likely \
@ -114,6 +156,10 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
);
}
if let Some(notebook) = source_kind.as_mut_jupyter() {
notebook.update(&source_map, &fixed_contents);
};
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents);
let locator = Locator::new(&fixed_contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
@ -138,18 +184,18 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
&directives,
settings,
flags::Noqa::Enabled,
None,
Some(source_kind),
);
if let Some(fixed_error) = fixed_error {
if !source_has_errors {
// Previous fix introduced a syntax error, abort
let fixes = print_diagnostics(diagnostics, path, &contents);
let fixes = print_diagnostics(diagnostics, path, &contents, source_kind);
let mut syntax_diagnostics = Vec::new();
syntax_error(&mut syntax_diagnostics, &fixed_error, &locator);
let syntax_errors =
print_diagnostics(syntax_diagnostics, path, &fixed_contents);
print_diagnostics(syntax_diagnostics, path, &fixed_contents, source_kind);
panic!(
r#"Fixed source has a syntax error where the source document does not. This is a bug in one of the generated fixes:
@ -169,7 +215,7 @@ Source with applied fixes:
let source_code = SourceFileBuilder::new(
path.file_name().unwrap().to_string_lossy().as_ref(),
contents,
contents.as_str(),
)
.finish();
@ -203,12 +249,14 @@ Source with applied fixes:
.collect()
}
fn print_diagnostics(diagnostics: Vec<Diagnostic>, file_path: &Path, source: &str) -> String {
let source_file = SourceFileBuilder::new(
file_path.file_name().unwrap().to_string_lossy().as_ref(),
source,
)
.finish();
fn print_diagnostics(
diagnostics: Vec<Diagnostic>,
file_path: &Path,
source: &str,
source_kind: &SourceKind,
) -> String {
let filename = file_path.file_name().unwrap().to_string_lossy();
let source_file = SourceFileBuilder::new(filename.as_ref(), source).finish();
let messages: Vec<_> = diagnostics
.into_iter()
@ -219,7 +267,35 @@ fn print_diagnostics(diagnostics: Vec<Diagnostic>, file_path: &Path, source: &st
})
.collect();
print_messages(&messages)
if source_kind.is_jupyter() {
print_jupyter_messages(&messages, &filename, source_kind)
} else {
print_messages(&messages)
}
}
pub(crate) fn print_jupyter_messages(
messages: &[Message],
filename: &str,
source_kind: &SourceKind,
) -> String {
let mut output = Vec::new();
TextEmitter::default()
.with_show_fix_status(true)
.with_show_fix_diff(true)
.with_show_source(true)
.emit(
&mut output,
messages,
&EmitterContext::new(&FxHashMap::from_iter([(
filename.to_string(),
source_kind.clone(),
)])),
)
.unwrap();
String::from_utf8(output).unwrap()
}
pub(crate) fn print_messages(messages: &[Message]) -> String {
@ -241,6 +317,13 @@ pub(crate) fn print_messages(messages: &[Message]) -> String {
#[macro_export]
macro_rules! assert_messages {
($value:expr, $path:expr, $source_kind:expr) => {{
insta::with_settings!({ omit_expression => true }, {
insta::assert_snapshot!(
$crate::test::print_jupyter_messages(&$value, &$path, &$source_kind)
);
});
}};
($value:expr, @$snapshot:literal) => {{
insta::with_settings!({ omit_expression => true }, {
insta::assert_snapshot!($crate::test::print_messages(&$value), $snapshot);