mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-22 11:24:35 +00:00
Add NotebookIndex
to the cache (#6863)
## Summary This PR updates the `FileCache` to include an optional `NotebookIndex` to support caching for Jupyter Notebooks. We only require the index to compute the diagnostics and thus we don't really need to store the entire `Notebook` on the `Diagnostics` struct. This means we only need the index to be stored in the cache to reconstruct the `Diagnostics`. ## Test Plan Update an existing test case to run over the fixtures under `ruff_notebook` crate where there are multiple Jupyter Notebook. Locally, the following commands were run in order: 1. Remove the cache: `rm -rf .ruff_cache` 2. Run without cache: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb --no-cache` 3. Run with cache: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb` 4. Check whether the `.ruff_cache` directory was created or not 5. Run with cache again and verify: `cargo run --bin ruff -- check --isolated crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb` ## Benchmarks https://github.com/astral-sh/ruff/pull/6863#issuecomment-1715675186 fixes: #6671
This commit is contained in:
parent
e7b7e4a18d
commit
ee0f1270cf
11 changed files with 84 additions and 58 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2183,6 +2183,7 @@ dependencies = [
|
|||
"similar",
|
||||
"strum",
|
||||
"tempfile",
|
||||
"test-case",
|
||||
"thiserror",
|
||||
"tikv-jemallocator",
|
||||
"tracing",
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::num::NonZeroUsize;
|
|||
|
||||
use colored::Colorize;
|
||||
|
||||
use ruff_notebook::{Notebook, NotebookIndex};
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_source_file::OneIndexed;
|
||||
|
||||
use crate::fs::relativize_path;
|
||||
|
@ -65,7 +65,7 @@ impl Emitter for GroupedEmitter {
|
|||
writer,
|
||||
"{}",
|
||||
DisplayGroupedMessage {
|
||||
jupyter_index: context.notebook(message.filename()).map(Notebook::index),
|
||||
notebook_index: context.notebook_index(message.filename()),
|
||||
message,
|
||||
show_fix_status: self.show_fix_status,
|
||||
show_source: self.show_source,
|
||||
|
@ -92,7 +92,7 @@ struct DisplayGroupedMessage<'a> {
|
|||
show_source: bool,
|
||||
row_length: NonZeroUsize,
|
||||
column_length: NonZeroUsize,
|
||||
jupyter_index: Option<&'a NotebookIndex>,
|
||||
notebook_index: Option<&'a NotebookIndex>,
|
||||
}
|
||||
|
||||
impl Display for DisplayGroupedMessage<'_> {
|
||||
|
@ -110,7 +110,7 @@ impl Display for DisplayGroupedMessage<'_> {
|
|||
)?;
|
||||
|
||||
// Check if we're working on a jupyter notebook and translate positions with cell accordingly
|
||||
let (row, col) = if let Some(jupyter_index) = self.jupyter_index {
|
||||
let (row, col) = if let Some(jupyter_index) = self.notebook_index {
|
||||
write!(
|
||||
f,
|
||||
"cell {cell}{sep}",
|
||||
|
@ -150,7 +150,7 @@ impl Display for DisplayGroupedMessage<'_> {
|
|||
"{}",
|
||||
MessageCodeFrame {
|
||||
message,
|
||||
jupyter_index: self.jupyter_index
|
||||
notebook_index: self.notebook_index
|
||||
}
|
||||
)?;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ pub use json_lines::JsonLinesEmitter;
|
|||
pub use junit::JunitEmitter;
|
||||
pub use pylint::PylintEmitter;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
|
||||
use ruff_notebook::Notebook;
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_source_file::{SourceFile, SourceLocation};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
pub use text::TextEmitter;
|
||||
|
@ -127,21 +127,21 @@ pub trait Emitter {
|
|||
|
||||
/// Context passed to [`Emitter`].
|
||||
pub struct EmitterContext<'a> {
|
||||
notebooks: &'a FxHashMap<String, Notebook>,
|
||||
notebook_indexes: &'a FxHashMap<String, NotebookIndex>,
|
||||
}
|
||||
|
||||
impl<'a> EmitterContext<'a> {
|
||||
pub fn new(notebooks: &'a FxHashMap<String, Notebook>) -> Self {
|
||||
Self { notebooks }
|
||||
pub fn new(notebook_indexes: &'a FxHashMap<String, NotebookIndex>) -> Self {
|
||||
Self { notebook_indexes }
|
||||
}
|
||||
|
||||
/// Tests if the file with `name` is a jupyter notebook.
|
||||
pub fn is_notebook(&self, name: &str) -> bool {
|
||||
self.notebooks.contains_key(name)
|
||||
self.notebook_indexes.contains_key(name)
|
||||
}
|
||||
|
||||
pub fn notebook(&self, name: &str) -> Option<&Notebook> {
|
||||
self.notebooks.get(name)
|
||||
pub fn notebook_index(&self, name: &str) -> Option<&NotebookIndex> {
|
||||
self.notebook_indexes.get(name)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,8 +225,8 @@ def fibonacci(n):
|
|||
emitter: &mut dyn Emitter,
|
||||
messages: &[Message],
|
||||
) -> String {
|
||||
let source_kinds = FxHashMap::default();
|
||||
let context = EmitterContext::new(&source_kinds);
|
||||
let notebook_indexes = FxHashMap::default();
|
||||
let context = EmitterContext::new(¬ebook_indexes);
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
emitter.emit(&mut output, messages, &context).unwrap();
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, Sou
|
|||
use bitflags::bitflags;
|
||||
use colored::Colorize;
|
||||
|
||||
use ruff_notebook::{Notebook, NotebookIndex};
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
|
@ -71,14 +71,14 @@ impl Emitter for TextEmitter {
|
|||
)?;
|
||||
|
||||
let start_location = message.compute_start_location();
|
||||
let jupyter_index = context.notebook(message.filename()).map(Notebook::index);
|
||||
let notebook_index = context.notebook_index(message.filename());
|
||||
|
||||
// Check if we're working on a jupyter notebook and translate positions with cell accordingly
|
||||
let diagnostic_location = if let Some(jupyter_index) = jupyter_index {
|
||||
let diagnostic_location = if let Some(notebook_index) = notebook_index {
|
||||
write!(
|
||||
writer,
|
||||
"cell {cell}{sep}",
|
||||
cell = jupyter_index
|
||||
cell = notebook_index
|
||||
.cell(start_location.row.get())
|
||||
.unwrap_or_default(),
|
||||
sep = ":".cyan(),
|
||||
|
@ -86,7 +86,7 @@ impl Emitter for TextEmitter {
|
|||
|
||||
SourceLocation {
|
||||
row: OneIndexed::new(
|
||||
jupyter_index
|
||||
notebook_index
|
||||
.cell_row(start_location.row.get())
|
||||
.unwrap_or(1) as usize,
|
||||
)
|
||||
|
@ -115,7 +115,7 @@ impl Emitter for TextEmitter {
|
|||
"{}",
|
||||
MessageCodeFrame {
|
||||
message,
|
||||
jupyter_index
|
||||
notebook_index
|
||||
}
|
||||
)?;
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ impl Display for RuleCodeAndBody<'_> {
|
|||
|
||||
pub(super) struct MessageCodeFrame<'a> {
|
||||
pub(crate) message: &'a Message,
|
||||
pub(crate) jupyter_index: Option<&'a NotebookIndex>,
|
||||
pub(crate) notebook_index: Option<&'a NotebookIndex>,
|
||||
}
|
||||
|
||||
impl Display for MessageCodeFrame<'_> {
|
||||
|
@ -186,14 +186,12 @@ impl Display for MessageCodeFrame<'_> {
|
|||
let content_start_index = source_code.line_index(range.start());
|
||||
let mut start_index = content_start_index.saturating_sub(2);
|
||||
|
||||
// If we're working on a jupyter notebook, skip the lines which are
|
||||
// If we're working with a Jupyter Notebook, skip the lines which are
|
||||
// outside of the cell containing the diagnostic.
|
||||
if let Some(jupyter_index) = self.jupyter_index {
|
||||
let content_start_cell = jupyter_index
|
||||
.cell(content_start_index.get())
|
||||
.unwrap_or_default();
|
||||
if let Some(index) = self.notebook_index {
|
||||
let content_start_cell = index.cell(content_start_index.get()).unwrap_or_default();
|
||||
while start_index < content_start_index {
|
||||
if jupyter_index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
|
||||
if index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
|
||||
break;
|
||||
}
|
||||
start_index = start_index.saturating_add(1);
|
||||
|
@ -213,14 +211,12 @@ impl Display for MessageCodeFrame<'_> {
|
|||
.saturating_add(2)
|
||||
.min(OneIndexed::from_zero_indexed(source_code.line_count()));
|
||||
|
||||
// If we're working on a jupyter notebook, skip the lines which are
|
||||
// If we're working with a Jupyter Notebook, skip the lines which are
|
||||
// outside of the cell containing the diagnostic.
|
||||
if let Some(jupyter_index) = self.jupyter_index {
|
||||
let content_end_cell = jupyter_index
|
||||
.cell(content_end_index.get())
|
||||
.unwrap_or_default();
|
||||
if let Some(index) = self.notebook_index {
|
||||
let content_end_cell = index.cell(content_end_index.get()).unwrap_or_default();
|
||||
while end_index > content_end_index {
|
||||
if jupyter_index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
|
||||
if index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
|
||||
break;
|
||||
}
|
||||
end_index = end_index.saturating_sub(1);
|
||||
|
@ -256,10 +252,10 @@ impl Display for MessageCodeFrame<'_> {
|
|||
title: None,
|
||||
slices: vec![Slice {
|
||||
source: &source.text,
|
||||
line_start: self.jupyter_index.map_or_else(
|
||||
line_start: self.notebook_index.map_or_else(
|
||||
|| start_index.get(),
|
||||
|jupyter_index| {
|
||||
jupyter_index
|
||||
|notebook_index| {
|
||||
notebook_index
|
||||
.cell_row(start_index.get())
|
||||
.unwrap_or_default() as usize
|
||||
},
|
||||
|
|
|
@ -297,7 +297,7 @@ pub(crate) fn print_jupyter_messages(
|
|||
messages,
|
||||
&EmitterContext::new(&FxHashMap::from_iter([(
|
||||
path.file_name().unwrap().to_string_lossy().to_string(),
|
||||
notebook.clone(),
|
||||
notebook.index().clone(),
|
||||
)])),
|
||||
)
|
||||
.unwrap();
|
||||
|
|
|
@ -75,6 +75,7 @@ colored = { workspace = true, features = ["no-color"]}
|
|||
insta = { workspace = true, features = ["filters"] }
|
||||
insta-cmd = { version = "0.4.0" }
|
||||
tempfile = "3.6.0"
|
||||
test-case = { workspace = true }
|
||||
ureq = { version = "2.6.2", features = [] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
|
|
|
@ -8,6 +8,7 @@ use std::sync::Mutex;
|
|||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use ruff::message::Message;
|
||||
|
@ -15,6 +16,7 @@ use ruff::settings::Settings;
|
|||
use ruff::warn_user;
|
||||
use ruff_cache::{CacheKey, CacheKeyHasher};
|
||||
use ruff_diagnostics::{DiagnosticKind, Fix};
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_python_ast::imports::ImportMap;
|
||||
use ruff_source_file::SourceFileBuilder;
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
|
@ -193,6 +195,7 @@ impl Cache {
|
|||
key: T,
|
||||
messages: &[Message],
|
||||
imports: &ImportMap,
|
||||
notebook_index: Option<&NotebookIndex>,
|
||||
) {
|
||||
let source = if let Some(msg) = messages.first() {
|
||||
msg.file.source_text().to_owned()
|
||||
|
@ -226,6 +229,7 @@ impl Cache {
|
|||
imports: imports.clone(),
|
||||
messages,
|
||||
source,
|
||||
notebook_index: notebook_index.cloned(),
|
||||
};
|
||||
self.new_files.lock().unwrap().insert(path, file);
|
||||
}
|
||||
|
@ -263,6 +267,8 @@ pub(crate) struct FileCache {
|
|||
///
|
||||
/// This will be empty if `messages` is empty.
|
||||
source: String,
|
||||
/// Notebook index if this file is a Jupyter Notebook.
|
||||
notebook_index: Option<NotebookIndex>,
|
||||
}
|
||||
|
||||
impl FileCache {
|
||||
|
@ -283,7 +289,12 @@ impl FileCache {
|
|||
})
|
||||
.collect()
|
||||
};
|
||||
Diagnostics::new(messages, self.imports.clone())
|
||||
let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() {
|
||||
FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())])
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
};
|
||||
Diagnostics::new(messages, self.imports.clone(), notebook_indexes)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -350,16 +361,19 @@ mod tests {
|
|||
use anyhow::Result;
|
||||
use ruff_python_ast::imports::ImportMap;
|
||||
|
||||
#[test]
|
||||
fn same_results() {
|
||||
use test_case::test_case;
|
||||
|
||||
#[test_case("../ruff/resources/test/fixtures", "ruff_tests/cache_same_results_ruff"; "ruff_fixtures")]
|
||||
#[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")]
|
||||
fn same_results(package_root: &str, cache_dir_path: &str) {
|
||||
let mut cache_dir = temp_dir();
|
||||
cache_dir.push("ruff_tests/cache_same_results");
|
||||
cache_dir.push(cache_dir_path);
|
||||
let _ = fs::remove_dir_all(&cache_dir);
|
||||
cache::init(&cache_dir).unwrap();
|
||||
|
||||
let settings = AllSettings::default();
|
||||
|
||||
let package_root = fs::canonicalize("../ruff/resources/test/fixtures").unwrap();
|
||||
let package_root = fs::canonicalize(package_root).unwrap();
|
||||
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
|
||||
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
|
||||
|
||||
|
@ -444,9 +458,6 @@ mod tests {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
// Not stored in the cache.
|
||||
expected_diagnostics.notebooks.clear();
|
||||
got_diagnostics.notebooks.clear();
|
||||
assert_eq!(expected_diagnostics, got_diagnostics);
|
||||
}
|
||||
|
||||
|
@ -614,6 +625,7 @@ mod tests {
|
|||
imports: ImportMap::new(),
|
||||
messages: Vec::new(),
|
||||
source: String::new(),
|
||||
notebook_index: None,
|
||||
},
|
||||
);
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ use itertools::Itertools;
|
|||
use log::{debug, error, warn};
|
||||
#[cfg(not(target_family = "wasm"))]
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use ruff::message::Message;
|
||||
use ruff::registry::Rule;
|
||||
|
@ -156,6 +157,7 @@ pub(crate) fn check(
|
|||
TextSize::default(),
|
||||
)],
|
||||
ImportMap::default(),
|
||||
FxHashMap::default(),
|
||||
)
|
||||
} else {
|
||||
warn!(
|
||||
|
|
|
@ -26,7 +26,7 @@ use ruff::source_kind::SourceKind;
|
|||
use ruff::{fs, IOError, SyntaxError};
|
||||
use ruff_diagnostics::Diagnostic;
|
||||
use ruff_macros::CacheKey;
|
||||
use ruff_notebook::{Cell, Notebook, NotebookError};
|
||||
use ruff_notebook::{Cell, Notebook, NotebookError, NotebookIndex};
|
||||
use ruff_python_ast::imports::ImportMap;
|
||||
use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
|
||||
use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
|
||||
|
@ -64,16 +64,20 @@ pub(crate) struct Diagnostics {
|
|||
pub(crate) messages: Vec<Message>,
|
||||
pub(crate) fixed: FxHashMap<String, FixTable>,
|
||||
pub(crate) imports: ImportMap,
|
||||
pub(crate) notebooks: FxHashMap<String, Notebook>,
|
||||
pub(crate) notebook_indexes: FxHashMap<String, NotebookIndex>,
|
||||
}
|
||||
|
||||
impl Diagnostics {
|
||||
pub(crate) fn new(messages: Vec<Message>, imports: ImportMap) -> Self {
|
||||
pub(crate) fn new(
|
||||
messages: Vec<Message>,
|
||||
imports: ImportMap,
|
||||
notebook_indexes: FxHashMap<String, NotebookIndex>,
|
||||
) -> Self {
|
||||
Self {
|
||||
messages,
|
||||
fixed: FxHashMap::default(),
|
||||
imports,
|
||||
notebooks: FxHashMap::default(),
|
||||
notebook_indexes,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,6 +98,7 @@ impl Diagnostics {
|
|||
TextSize::default(),
|
||||
)],
|
||||
ImportMap::default(),
|
||||
FxHashMap::default(),
|
||||
)
|
||||
} else {
|
||||
match path {
|
||||
|
@ -130,7 +135,7 @@ impl AddAssign for Diagnostics {
|
|||
}
|
||||
}
|
||||
}
|
||||
self.notebooks.extend(other.notebooks);
|
||||
self.notebook_indexes.extend(other.notebook_indexes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,7 +346,13 @@ pub(crate) fn lint_path(
|
|||
if let Some((cache, relative_path, key)) = caching {
|
||||
// We don't cache parsing errors.
|
||||
if parse_error.is_none() {
|
||||
cache.update(relative_path.to_owned(), key, &messages, &imports);
|
||||
cache.update(
|
||||
relative_path.to_owned(),
|
||||
key,
|
||||
&messages,
|
||||
&imports,
|
||||
source_kind.as_ipy_notebook().map(Notebook::index),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -359,12 +370,13 @@ pub(crate) fn lint_path(
|
|||
);
|
||||
}
|
||||
|
||||
let notebooks = if let SourceKind::IpyNotebook(notebook) = source_kind {
|
||||
let notebook_indexes = if let SourceKind::IpyNotebook(notebook) = source_kind {
|
||||
FxHashMap::from_iter([(
|
||||
path.to_str()
|
||||
.ok_or_else(|| anyhow!("Unable to parse filename: {:?}", path))?
|
||||
.to_string(),
|
||||
notebook,
|
||||
// Index needs to be computed always to store in cache.
|
||||
notebook.index().clone(),
|
||||
)])
|
||||
} else {
|
||||
FxHashMap::default()
|
||||
|
@ -374,7 +386,7 @@ pub(crate) fn lint_path(
|
|||
messages,
|
||||
fixed: FxHashMap::from_iter([(fs::relativize_path(path), fixed)]),
|
||||
imports,
|
||||
notebooks,
|
||||
notebook_indexes,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -498,7 +510,7 @@ pub(crate) fn lint_stdin(
|
|||
fixed,
|
||||
)]),
|
||||
imports,
|
||||
notebooks: FxHashMap::default(),
|
||||
notebook_indexes: FxHashMap::default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ impl Printer {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
let context = EmitterContext::new(&diagnostics.notebooks);
|
||||
let context = EmitterContext::new(&diagnostics.notebook_indexes);
|
||||
|
||||
match self.format {
|
||||
SerializationFormat::Json => {
|
||||
|
@ -364,7 +364,7 @@ impl Printer {
|
|||
writeln!(writer)?;
|
||||
}
|
||||
|
||||
let context = EmitterContext::new(&diagnostics.notebooks);
|
||||
let context = EmitterContext::new(&diagnostics.notebook_indexes);
|
||||
TextEmitter::default()
|
||||
.with_show_fix_status(show_fix_status(self.autofix_level))
|
||||
.with_show_source(self.flags.intersects(Flags::SHOW_SOURCE))
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Jupyter Notebook indexing table
|
||||
///
|
||||
/// When we lint a jupyter notebook, we have to translate the row/column based on
|
||||
/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NotebookIndex {
|
||||
/// Enter a row (1-based), get back the cell (1-based)
|
||||
pub(super) row_to_cell: Vec<u32>,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue