Create ruff_notebook crate (#7039)

## Summary

This PR moves `ruff/jupyter` into its own `ruff_notebook` crate. Beyond
the move itself, there were a few challenges:

1. `ruff_notebook` relies on the source map abstraction. I've moved the
source map into `ruff_diagnostics`, since it doesn't have any
dependencies on its own and is used alongside diagnostics.
2. `ruff_notebook` has a couple tests for end-to-end linting and
autofixing. I had to leave these tests in `ruff` itself.
3. We had code in `ruff/jupyter` that relied on Python lexing, in order
to provide a more targeted error message in the event that a user saves
a `.py` file with a `.ipynb` extension. I removed this in order to avoid
a dependency on the parser, it felt like it wasn't worth retaining just
for that dependency.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-09-01 14:56:44 +01:00 committed by GitHub
parent 08e246764f
commit afcd00da56
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
48 changed files with 274 additions and 253 deletions

View file

@ -129,6 +129,7 @@ At time of writing, the repository includes the following crates:
intermediate representation. The backend for `ruff_python_formatter`.
- `crates/ruff_index`: library crate inspired by `rustc_index`.
- `crates/ruff_macros`: proc macro crate containing macros used by Ruff.
- `crates/ruff_notebook`: library crate for parsing and manipulating Jupyter notebooks.
- `crates/ruff_python_ast`: library crate containing Python-specific AST types and utilities.
- `crates/ruff_python_codegen`: library crate containing utilities for generating Python source code.
- `crates/ruff_python_formatter`: library crate implementing the Python formatter. Emits an

24
Cargo.lock generated
View file

@ -2088,6 +2088,7 @@ dependencies = [
"ruff_diagnostics",
"ruff_index",
"ruff_macros",
"ruff_notebook",
"ruff_python_ast",
"ruff_python_codegen",
"ruff_python_index",
@ -2103,7 +2104,6 @@ dependencies = [
"semver",
"serde",
"serde_json",
"serde_with",
"similar",
"smallvec",
"strum",
@ -2115,7 +2115,6 @@ dependencies = [
"typed-arena",
"unicode-width",
"unicode_names2",
"uuid",
"wsl",
]
@ -2185,6 +2184,7 @@ dependencies = [
"ruff_diagnostics",
"ruff_formatter",
"ruff_macros",
"ruff_notebook",
"ruff_python_ast",
"ruff_python_formatter",
"ruff_python_stdlib",
@ -2227,6 +2227,7 @@ dependencies = [
"ruff_cli",
"ruff_diagnostics",
"ruff_formatter",
"ruff_notebook",
"ruff_python_ast",
"ruff_python_codegen",
"ruff_python_formatter",
@ -2292,6 +2293,25 @@ dependencies = [
"syn 2.0.29",
]
[[package]]
name = "ruff_notebook"
version = "0.0.0"
dependencies = [
"anyhow",
"insta",
"itertools",
"once_cell",
"ruff_diagnostics",
"ruff_source_file",
"ruff_text_size",
"serde",
"serde_json",
"serde_with",
"test-case",
"thiserror",
"uuid",
]
[[package]]
name = "ruff_python_ast"
version = "0.0.0"

View file

@ -18,6 +18,7 @@ name = "ruff"
ruff_cache = { path = "../ruff_cache" }
ruff_diagnostics = { path = "../ruff_diagnostics", features = ["serde"] }
ruff_index = { path = "../ruff_index" }
ruff_notebook = { path = "../ruff_notebook" }
ruff_macros = { path = "../ruff_macros" }
ruff_python_ast = { path = "../ruff_python_ast", features = ["serde"] }
ruff_python_codegen = { path = "../ruff_python_codegen" }
@ -64,17 +65,15 @@ schemars = { workspace = true, optional = true }
semver = { version = "1.0.16" }
serde = { workspace = true }
serde_json = { workspace = true }
serde_with = { version = "3.0.0" }
similar = { workspace = true }
smallvec = { workspace = true }
strum = { workspace = true }
strum_macros = { workspace = true }
thiserror = { version = "1.0.43" }
thiserror = { workspace = true }
toml = { workspace = true }
typed-arena = { version = "2.0.2" }
unicode-width = { workspace = true }
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
uuid = { workspace = true, features = ["v4", "fast-rng", "macro-diagnostics", "js"] }
wsl = { version = "0.1.0" }
[dev-dependencies]

View file

@ -4,17 +4,15 @@ use std::collections::BTreeSet;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use rustc_hash::{FxHashMap, FxHashSet};
use ruff_diagnostics::{Diagnostic, Edit, Fix, IsolationLevel};
use ruff_diagnostics::{Diagnostic, Edit, Fix, IsolationLevel, SourceMap};
use ruff_source_file::Locator;
use crate::autofix::source_map::SourceMap;
use crate::linter::FixTable;
use crate::registry::{AsRule, Rule};
pub(crate) mod codemods;
pub(crate) mod edits;
pub(crate) mod snippet;
pub(crate) mod source_map;
pub(crate) struct FixResult {
/// The resulting source code, after applying all fixes.
@ -140,10 +138,9 @@ fn cmp_fix(rule1: Rule, rule2: Rule, fix1: &Fix, fix2: &Fix) -> std::cmp::Orderi
mod tests {
use ruff_text_size::{Ranged, TextSize};
use ruff_diagnostics::{Diagnostic, Edit, Fix};
use ruff_diagnostics::{Diagnostic, Edit, Fix, SourceMarker};
use ruff_source_file::Locator;
use crate::autofix::source_map::SourceMarker;
use crate::autofix::{apply_fixes, FixResult};
use crate::rules::pycodestyle::rules::MissingNewlineAtEndOfFile;
@ -207,14 +204,8 @@ print("hello world")
assert_eq!(
source_map.markers(),
&[
SourceMarker {
source: 10.into(),
dest: 10.into(),
},
SourceMarker {
source: 10.into(),
dest: 21.into(),
},
SourceMarker::new(10.into(), 10.into(),),
SourceMarker::new(10.into(), 21.into(),),
]
);
}
@ -250,14 +241,8 @@ class A(Bar):
assert_eq!(
source_map.markers(),
&[
SourceMarker {
source: 8.into(),
dest: 8.into(),
},
SourceMarker {
source: 14.into(),
dest: 11.into(),
},
SourceMarker::new(8.into(), 8.into(),),
SourceMarker::new(14.into(), 11.into(),),
]
);
}
@ -289,14 +274,8 @@ class A:
assert_eq!(
source_map.markers(),
&[
SourceMarker {
source: 7.into(),
dest: 7.into()
},
SourceMarker {
source: 15.into(),
dest: 7.into()
}
SourceMarker::new(7.into(), 7.into()),
SourceMarker::new(15.into(), 7.into()),
]
);
}
@ -332,22 +311,10 @@ class A(object):
assert_eq!(
source_map.markers(),
&[
SourceMarker {
source: 8.into(),
dest: 8.into()
},
SourceMarker {
source: 16.into(),
dest: 8.into()
},
SourceMarker {
source: 22.into(),
dest: 14.into(),
},
SourceMarker {
source: 30.into(),
dest: 14.into(),
}
SourceMarker::new(8.into(), 8.into()),
SourceMarker::new(16.into(), 8.into()),
SourceMarker::new(22.into(), 14.into(),),
SourceMarker::new(30.into(), 14.into(),),
]
);
}
@ -382,14 +349,8 @@ class A:
assert_eq!(
source_map.markers(),
&[
SourceMarker {
source: 7.into(),
dest: 7.into(),
},
SourceMarker {
source: 15.into(),
dest: 7.into(),
}
SourceMarker::new(7.into(), 7.into(),),
SourceMarker::new(15.into(), 7.into(),),
]
);
}

View file

@ -20,7 +20,6 @@ mod doc_lines;
mod docstrings;
pub mod fs;
mod importer;
pub mod jupyter;
mod lex;
pub mod line_width;
pub mod linter;

View file

@ -6,8 +6,6 @@ use anyhow::{anyhow, Result};
use colored::Colorize;
use itertools::Itertools;
use log::error;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{AsMode, ParseError};
use rustc_hash::FxHashMap;
use ruff_diagnostics::Diagnostic;
@ -15,7 +13,8 @@ use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{AsMode, ParseError};
use ruff_source_file::{Locator, SourceFileBuilder};
use ruff_text_size::Ranged;
@ -609,3 +608,133 @@ This indicates a bug in `{}`. If you could open an issue at:
);
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use anyhow::Result;
use test_case::test_case;
use ruff_notebook::{Notebook, NotebookError};
use crate::registry::Rule;
use crate::source_kind::SourceKind;
use crate::test::{test_contents, test_notebook_path, TestedNotebook};
use crate::{assert_messages, settings};
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
Path::new("../ruff_notebook/resources/test/fixtures/jupyter").join(path)
}
#[test]
fn test_import_sorting() -> Result<(), NotebookError> {
let actual = notebook_path("isort.ipynb");
let expected = notebook_path("isort_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnsortedImports),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_ipy_escape_command() -> Result<(), NotebookError> {
let actual = notebook_path("ipy_escape_command.ipynb");
let expected = notebook_path("ipy_escape_command_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_unused_variable() -> Result<(), NotebookError> {
let actual = notebook_path("unused_variable.ipynb");
let expected = notebook_path("unused_variable_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnusedVariable),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_json_consistency() -> Result<()> {
let actual_path = notebook_path("before_fix.ipynb");
let expected_path = notebook_path("after_fix.ipynb");
let TestedNotebook {
linted_notebook: fixed_notebook,
..
} = test_notebook_path(
actual_path,
&expected_path,
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
let mut writer = Vec::new();
fixed_notebook.write(&mut writer)?;
let actual = String::from_utf8(writer)?;
let expected = std::fs::read_to_string(expected_path)?;
assert_eq!(actual, expected);
Ok(())
}
#[test_case(Path::new("before_fix.ipynb"), true; "trailing_newline")]
#[test_case(Path::new("no_trailing_newline.ipynb"), false; "no_trailing_newline")]
fn test_trailing_newline(path: &Path, trailing_newline: bool) -> Result<()> {
let notebook = Notebook::from_path(&notebook_path(path))?;
assert_eq!(notebook.trailing_newline(), trailing_newline);
let mut writer = Vec::new();
notebook.write(&mut writer)?;
let string = String::from_utf8(writer)?;
assert_eq!(string.ends_with('\n'), trailing_newline);
Ok(())
}
// Version <4.5, don't emit cell ids
#[test_case(Path::new("no_cell_id.ipynb"), false; "no_cell_id")]
// Version 4.5, cell ids are missing and need to be added
#[test_case(Path::new("add_missing_cell_id.ipynb"), true; "add_missing_cell_id")]
fn test_cell_id(path: &Path, has_id: bool) -> Result<()> {
let source_notebook = Notebook::from_path(&notebook_path(path))?;
let source_kind = SourceKind::IpyNotebook(source_notebook);
let (_, transformed) = test_contents(
&source_kind,
path,
&settings::Settings::for_rule(Rule::UnusedImport),
);
let linted_notebook = transformed.into_owned().expect_ipy_notebook();
let mut writer = Vec::new();
linted_notebook.write(&mut writer)?;
let actual = String::from_utf8(writer)?;
if has_id {
assert!(actual.contains(r#""id": ""#));
} else {
assert!(!actual.contains(r#""id":"#));
}
Ok(())
}
}

View file

@ -12,8 +12,8 @@ use ruff_python_parser::{ParseError, ParseErrorType};
use ruff_source_file::{OneIndexed, SourceCode, SourceLocation};
use crate::fs;
use crate::jupyter::Notebook;
use crate::source_kind::SourceKind;
use ruff_notebook::Notebook;
pub static WARNINGS: Lazy<Mutex<Vec<&'static str>>> = Lazy::new(Mutex::default);

View file

@ -4,10 +4,10 @@ use std::num::NonZeroUsize;
use colored::Colorize;
use ruff_notebook::{Notebook, NotebookIndex};
use ruff_source_file::OneIndexed;
use crate::fs::relativize_path;
use crate::jupyter::{Notebook, NotebookIndex};
use crate::message::diff::calculate_print_width;
use crate::message::text::{MessageCodeFrame, RuleCodeAndBody};
use crate::message::{

View file

@ -14,12 +14,11 @@ pub use json_lines::JsonLinesEmitter;
pub use junit::JunitEmitter;
pub use pylint::PylintEmitter;
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_notebook::Notebook;
use ruff_source_file::{SourceFile, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};
pub use text::TextEmitter;
use crate::jupyter::Notebook;
mod azure;
mod diff;
mod github;

View file

@ -7,11 +7,11 @@ use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, Sou
use bitflags::bitflags;
use colored::Colorize;
use ruff_notebook::{Notebook, NotebookIndex};
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::fs::relativize_path;
use crate::jupyter::{Notebook, NotebookIndex};
use crate::line_width::{LineWidthBuilder, TabSize};
use crate::message::diff::Diff;
use crate::message::{Emitter, EmitterContext, Message};

View file

@ -1,13 +1,13 @@
use ruff_python_ast::{self as ast, ElifElseClause, ExceptHandler, MatchCase, Stmt};
use ruff_text_size::{Ranged, TextRange, TextSize};
use std::iter::Peekable;
use std::slice;
use ruff_notebook::Notebook;
use ruff_python_ast::statement_visitor::StatementVisitor;
use ruff_python_ast::{self as ast, ElifElseClause, ExceptHandler, MatchCase, Stmt};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::directives::IsortDirectives;
use crate::jupyter::Notebook;
use crate::rules::isort::helpers;
use crate::source_kind::SourceKind;

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/jupyter/notebook.rs
source: crates/ruff/src/linter.rs
---
isort.ipynb:cell 1:1:1: I001 [*] Import block is un-sorted or un-formatted
|

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/jupyter/notebook.rs
source: crates/ruff/src/linter.rs
---
ipy_escape_command.ipynb:cell 1:5:8: F401 [*] `os` imported but unused
|

View file

@ -1,5 +1,5 @@
---
source: crates/ruff/src/jupyter/notebook.rs
source: crates/ruff/src/linter.rs
---
unused_variable.ipynb:cell 1:2:5: F841 [*] Local variable `foo1` is assigned to but never used
|

View file

@ -1,5 +1,5 @@
use crate::autofix::source_map::SourceMap;
use crate::jupyter::Notebook;
use ruff_diagnostics::SourceMap;
use ruff_notebook::Notebook;
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum SourceKind {

View file

@ -21,7 +21,6 @@ use ruff_text_size::Ranged;
use crate::autofix::{fix_file, FixResult};
use crate::directives;
use crate::jupyter::{Notebook, NotebookError};
use crate::linter::{check_path, LinterResult};
use crate::message::{Emitter, EmitterContext, Message, TextEmitter};
use crate::packaging::detect_package_root;
@ -29,6 +28,7 @@ use crate::registry::AsRule;
use crate::rules::pycodestyle::rules::syntax_error;
use crate::settings::{flags, Settings};
use crate::source_kind::SourceKind;
use ruff_notebook::{Notebook, NotebookError};
#[cfg(not(fuzzing))]
pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {

View file

@ -25,6 +25,7 @@ ruff = { path = "../ruff", features = ["clap"] }
ruff_cache = { path = "../ruff_cache" }
ruff_diagnostics = { path = "../ruff_diagnostics" }
ruff_formatter = { path = "../ruff_formatter" }
ruff_notebook = { path = "../ruff_notebook" }
ruff_macros = { path = "../ruff_macros" }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_formatter = { path = "../ruff_python_formatter" }

View file

@ -1,8 +1,8 @@
#![cfg_attr(target_family = "wasm", allow(dead_code))]
use std::fs::write;
use std::fs::{write, File};
use std::io;
use std::io::Write;
use std::io::{BufWriter, Write};
use std::ops::AddAssign;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
@ -16,7 +16,6 @@ use rustc_hash::FxHashMap;
use similar::TextDiff;
use thiserror::Error;
use ruff::jupyter::{Cell, Notebook, NotebookError};
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError;
use ruff::message::Message;
@ -27,6 +26,7 @@ use ruff::source_kind::SourceKind;
use ruff::{fs, IOError, SyntaxError};
use ruff_diagnostics::Diagnostic;
use ruff_macros::CacheKey;
use ruff_notebook::{Cell, Notebook, NotebookError};
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
@ -243,7 +243,8 @@ pub(crate) fn lint_path(
write(path, transformed.as_bytes())?;
}
SourceKind::IpyNotebook(notebook) => {
notebook.write(path)?;
let mut writer = BufWriter::new(File::create(path)?);
notebook.write(&mut writer)?;
}
},
flags::FixMode::Diff => {
@ -565,8 +566,7 @@ impl From<&SourceExtractionError> for Diagnostic {
}
// Syntax errors.
SourceExtractionError::Notebook(
NotebookError::PythonSource(_)
| NotebookError::InvalidJson(_)
NotebookError::InvalidJson(_)
| NotebookError::InvalidSchema(_)
| NotebookError::InvalidFormat(_),
) => Diagnostic::new(

View file

@ -18,6 +18,7 @@ ruff_formatter = { path = "../ruff_formatter" }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_codegen = { path = "../ruff_python_codegen" }
ruff_python_formatter = { path = "../ruff_python_formatter" }
ruff_notebook = { path = "../ruff_notebook" }
ruff_python_literal = { path = "../ruff_python_literal" }
ruff_python_parser = { path = "../ruff_python_parser" }
ruff_python_stdlib = { path = "../ruff_python_stdlib" }

View file

@ -6,7 +6,6 @@ use std::path::PathBuf;
use anyhow::Result;
use ruff::jupyter;
use ruff_python_codegen::round_trip;
use ruff_python_stdlib::path::is_jupyter_notebook;
@ -20,7 +19,7 @@ pub(crate) struct Args {
pub(crate) fn main(args: &Args) -> Result<()> {
let path = args.file.as_path();
if is_jupyter_notebook(path) {
println!("{}", jupyter::round_trip(path)?);
println!("{}", ruff_notebook::round_trip(path)?);
} else {
let contents = fs::read_to_string(&args.file)?;
println!("{}", round_trip(&contents, &args.file.to_string_lossy())?);

View file

@ -1,9 +1,11 @@
pub use diagnostic::{Diagnostic, DiagnosticKind};
pub use edit::Edit;
pub use fix::{Applicability, Fix, IsolationLevel};
pub use source_map::{SourceMap, SourceMarker};
pub use violation::{AlwaysAutofixableViolation, AutofixKind, Violation};
mod diagnostic;
mod edit;
mod fix;
mod source_map;
mod violation;

View file

@ -1,15 +1,29 @@
use ruff_text_size::{Ranged, TextSize};
use ruff_diagnostics::Edit;
use crate::Edit;
/// Lightweight sourcemap marker representing the source and destination
/// position for an [`Edit`].
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct SourceMarker {
pub struct SourceMarker {
/// Position of the marker in the original source.
pub(crate) source: TextSize,
source: TextSize,
/// Position of the marker in the transformed code.
pub(crate) dest: TextSize,
dest: TextSize,
}
impl SourceMarker {
pub fn new(source: TextSize, dest: TextSize) -> Self {
Self { source, dest }
}
pub const fn source(&self) -> TextSize {
self.source
}
pub const fn dest(&self) -> TextSize {
self.dest
}
}
/// A collection of [`SourceMarker`].
@ -18,12 +32,12 @@ pub(crate) struct SourceMarker {
/// the transformed code. Here, only the boundaries of edits are tracked instead
/// of every single character.
#[derive(Default, PartialEq, Eq)]
pub(crate) struct SourceMap(Vec<SourceMarker>);
pub struct SourceMap(Vec<SourceMarker>);
impl SourceMap {
/// Returns a slice of all the markers in the sourcemap in the order they
/// were added.
pub(crate) fn markers(&self) -> &[SourceMarker] {
pub fn markers(&self) -> &[SourceMarker] {
&self.0
}
@ -31,7 +45,7 @@ impl SourceMap {
///
/// The `output_length` is the length of the transformed string before the
/// edit is applied.
pub(crate) fn push_start_marker(&mut self, edit: &Edit, output_length: TextSize) {
pub fn push_start_marker(&mut self, edit: &Edit, output_length: TextSize) {
self.0.push(SourceMarker {
source: edit.start(),
dest: output_length,
@ -42,7 +56,7 @@ impl SourceMap {
///
/// The `output_length` is the length of the transformed string after the
/// edit has been applied.
pub(crate) fn push_end_marker(&mut self, edit: &Edit, output_length: TextSize) {
pub fn push_end_marker(&mut self, edit: &Edit, output_length: TextSize) {
if edit.is_insertion() {
self.0.push(SourceMarker {
source: edit.start(),

View file

@ -0,0 +1,31 @@
[package]
name = "ruff_notebook"
version = "0.0.0"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[lib]
[dependencies]
ruff_diagnostics = { path = "../ruff_diagnostics" }
ruff_source_file = { path = "../ruff_source_file" }
ruff_text_size = { path = "../ruff_text_size" }
anyhow = { workspace = true }
itertools = { workspace = true }
once_cell = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
serde_with = { version = "3.0.0" }
thiserror = { workspace = true }
uuid = { workspace = true }
[dev-dependencies]
insta = { workspace = true }
test-case = { workspace = true }

View file

@ -1,7 +1,7 @@
use std::cmp::Ordering;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufReader, BufWriter, Cursor, Read, Seek, SeekFrom, Write};
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::{io, iter};
@ -12,14 +12,12 @@ use serde_json::error::Category;
use thiserror::Error;
use uuid::Uuid;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::Mode;
use ruff_diagnostics::{SourceMap, SourceMarker};
use ruff_source_file::{NewlineWithTrailingNewline, UniversalNewlineIterator};
use ruff_text_size::TextSize;
use crate::autofix::source_map::{SourceMap, SourceMarker};
use crate::jupyter::index::NotebookIndex;
use crate::jupyter::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
use crate::index::NotebookIndex;
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
/// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
@ -33,7 +31,7 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
let code = notebook.source_code().to_string();
notebook.update_cell_content(&code);
let mut writer = Vec::new();
notebook.write_inner(&mut writer)?;
notebook.write(&mut writer)?;
Ok(String::from_utf8(writer)?)
}
@ -99,8 +97,6 @@ pub enum NotebookError {
Io(#[from] io::Error),
#[error(transparent)]
Json(serde_json::Error),
#[error("Expected a Jupyter Notebook, which must be internally stored as JSON, but found a Python source file: {0}")]
PythonSource(serde_json::Error),
#[error("Expected a Jupyter Notebook, which must be internally stored as JSON, but this file isn't valid JSON: {0}")]
InvalidJson(serde_json::Error),
#[error("This file does not match the schema expected of Jupyter Notebooks: {0}")]
@ -162,24 +158,10 @@ impl Notebook {
// Translate the error into a diagnostic
return Err(match err.classify() {
Category::Io => NotebookError::Json(err),
Category::Syntax | Category::Eof => {
// Maybe someone saved the python sources (those with the `# %%` separator)
// as jupyter notebook instead. Let's help them.
let mut contents = String::new();
reader
.rewind()
.and_then(|_| reader.read_to_string(&mut contents))?;
// Check if tokenizing was successful and the file is non-empty
if lex(&contents, Mode::Module).any(|result| result.is_err()) {
NotebookError::InvalidJson(err)
} else {
NotebookError::PythonSource(err)
}
}
Category::Syntax | Category::Eof => NotebookError::InvalidJson(err),
Category::Data => {
// We could try to read the schema version here but if this fails it's
// a bug anyway
// a bug anyway.
NotebookError::InvalidSchema(err)
}
});
@ -256,13 +238,13 @@ impl Notebook {
// The first offset is always going to be at 0, so skip it.
for offset in self.cell_offsets.iter_mut().skip(1).rev() {
let closest_marker = match last_marker {
Some(marker) if marker.source <= *offset => marker,
Some(marker) if marker.source() <= *offset => marker,
_ => {
let Some(marker) = source_map
.markers()
.iter()
.rev()
.find(|m| m.source <= *offset)
.find(|marker| marker.source() <= *offset)
else {
// There are no markers above the current offset, so we can
// stop here.
@ -273,9 +255,9 @@ impl Notebook {
}
};
match closest_marker.source.cmp(&closest_marker.dest) {
Ordering::Less => *offset += closest_marker.dest - closest_marker.source,
Ordering::Greater => *offset -= closest_marker.source - closest_marker.dest,
match closest_marker.source().cmp(&closest_marker.dest()) {
Ordering::Less => *offset += closest_marker.dest() - closest_marker.source(),
Ordering::Greater => *offset -= closest_marker.source() - closest_marker.dest(),
Ordering::Equal => (),
}
}
@ -383,18 +365,23 @@ impl Notebook {
/// The index is built only once when required. This is only used to
/// report diagnostics, so by that time all of the autofixes must have
/// been applied if `--fix` was passed.
pub(crate) fn index(&self) -> &NotebookIndex {
pub fn index(&self) -> &NotebookIndex {
self.index.get_or_init(|| self.build_index())
}
/// Return the cell offsets for the concatenated source code corresponding
/// the Jupyter notebook.
pub(crate) fn cell_offsets(&self) -> &[TextSize] {
pub fn cell_offsets(&self) -> &[TextSize] {
&self.cell_offsets
}
/// Return `true` if the notebook has a trailing newline, `false` otherwise.
pub fn trailing_newline(&self) -> bool {
self.trailing_newline
}
/// Update the notebook with the given sourcemap and transformed content.
pub(crate) fn update(&mut self, source_map: &SourceMap, transformed: String) {
pub fn update(&mut self, source_map: &SourceMap, transformed: String) {
// Cell offsets must be updated before updating the cell content as
// it depends on the offsets to extract the cell content.
self.index.take();
@ -417,7 +404,8 @@ impl Notebook {
.map_or(true, |language| language.name == "python")
}
fn write_inner(&self, writer: &mut impl Write) -> anyhow::Result<()> {
/// Write the notebook back to the given [`Write`] implementor.
pub fn write(&self, writer: &mut dyn Write) -> anyhow::Result<()> {
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut serializer = serde_json::Serializer::with_formatter(writer, formatter);
@ -427,13 +415,6 @@ impl Notebook {
}
Ok(())
}
/// Write back with an indent of 1, just like black
pub fn write(&self, path: &Path) -> anyhow::Result<()> {
let mut writer = BufWriter::new(File::create(path)?);
self.write_inner(&mut writer)?;
Ok(())
}
}
#[cfg(test)]
@ -443,17 +424,11 @@ mod tests {
use anyhow::Result;
use test_case::test_case;
use crate::jupyter::index::NotebookIndex;
use crate::jupyter::schema::Cell;
use crate::jupyter::{Notebook, NotebookError};
use crate::registry::Rule;
use crate::source_kind::SourceKind;
use crate::test::{test_contents, test_notebook_path, test_resource_path, TestedNotebook};
use crate::{assert_messages, settings};
use crate::{Cell, Notebook, NotebookError, NotebookIndex};
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
test_resource_path("fixtures/jupyter").join(path)
Path::new("./resources/test/fixtures/jupyter").join(path)
}
#[test]
@ -474,7 +449,7 @@ mod tests {
fn test_invalid() {
assert!(matches!(
Notebook::from_path(&notebook_path("invalid_extension.ipynb")),
Err(NotebookError::PythonSource(_))
Err(NotebookError::InvalidJson(_))
));
assert!(matches!(
Notebook::from_path(&notebook_path("not_json.ipynb")),
@ -545,114 +520,4 @@ print("after empty cells")
);
Ok(())
}
#[test]
fn test_import_sorting() -> Result<(), NotebookError> {
let actual = notebook_path("isort.ipynb");
let expected = notebook_path("isort_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnsortedImports),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_ipy_escape_command() -> Result<(), NotebookError> {
let actual = notebook_path("ipy_escape_command.ipynb");
let expected = notebook_path("ipy_escape_command_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_unused_variable() -> Result<(), NotebookError> {
let actual = notebook_path("unused_variable.ipynb");
let expected = notebook_path("unused_variable_expected.ipynb");
let TestedNotebook {
messages,
source_notebook,
..
} = test_notebook_path(
&actual,
expected,
&settings::Settings::for_rule(Rule::UnusedVariable),
)?;
assert_messages!(messages, actual, source_notebook);
Ok(())
}
#[test]
fn test_json_consistency() -> Result<()> {
let actual_path = notebook_path("before_fix.ipynb");
let expected_path = notebook_path("after_fix.ipynb");
let TestedNotebook {
linted_notebook: fixed_notebook,
..
} = test_notebook_path(
actual_path,
&expected_path,
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
let mut writer = Vec::new();
fixed_notebook.write_inner(&mut writer)?;
let actual = String::from_utf8(writer)?;
let expected = std::fs::read_to_string(expected_path)?;
assert_eq!(actual, expected);
Ok(())
}
#[test_case(Path::new("before_fix.ipynb"), true; "trailing_newline")]
#[test_case(Path::new("no_trailing_newline.ipynb"), false; "no_trailing_newline")]
fn test_trailing_newline(path: &Path, trailing_newline: bool) -> Result<()> {
let notebook = Notebook::from_path(&notebook_path(path))?;
assert_eq!(notebook.trailing_newline, trailing_newline);
let mut writer = Vec::new();
notebook.write_inner(&mut writer)?;
let string = String::from_utf8(writer)?;
assert_eq!(string.ends_with('\n'), trailing_newline);
Ok(())
}
// Version <4.5, don't emit cell ids
#[test_case(Path::new("no_cell_id.ipynb"), false; "no_cell_id")]
// Version 4.5, cell ids are missing and need to be added
#[test_case(Path::new("add_missing_cell_id.ipynb"), true; "add_missing_cell_id")]
fn test_cell_id(path: &Path, has_id: bool) -> Result<()> {
let source_notebook = Notebook::from_path(&notebook_path(path))?;
let source_kind = SourceKind::IpyNotebook(source_notebook);
let (_, transformed) = test_contents(
&source_kind,
path,
&settings::Settings::for_rule(Rule::UnusedImport),
);
let linted_notebook = transformed.into_owned().expect_ipy_notebook();
let mut writer = Vec::new();
linted_notebook.write_inner(&mut writer)?;
let actual = String::from_utf8(writer)?;
if has_id {
assert!(actual.contains(r#""id": ""#));
} else {
assert!(!actual.contains(r#""id":"#));
}
Ok(())
}
}

View file

@ -46,7 +46,7 @@ fn sort_alphabetically<T: Serialize, S: serde::Serializer>(
///
/// use serde::Serialize;
///
/// use ruff::jupyter::SortAlphabetically;
/// use ruff_notebook::SortAlphabetically;
///
/// #[derive(Serialize)]
/// struct MyStruct {