Add NotebookIndex to the cache (#6863)

## Summary

This PR updates the `FileCache` to include an optional `NotebookIndex`
to support caching for Jupyter Notebooks.

We only require the index to compute the diagnostics and thus we don't
really need to store the entire `Notebook` on the `Diagnostics` struct.
This means we only need the index to be stored in the cache to
reconstruct the `Diagnostics`.

## Test Plan

Update an existing test case to run over the fixtures under
`ruff_notebook` crate where there are multiple Jupyter Notebook.

Locally, the following commands were run in order:
1. Remove the cache: `rm -rf .ruff_cache`
2. Run without cache: `cargo run --bin ruff -- check --isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb
--no-cache`
3. Run with cache: `cargo run --bin ruff -- check --isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb`
4. Check whether the `.ruff_cache` directory was created or not
5. Run with cache again and verify: `cargo run --bin ruff -- check
--isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb`

## Benchmarks

https://github.com/astral-sh/ruff/pull/6863#issuecomment-1715675186

fixes: #6671
This commit is contained in:
Dhruv Manilawala 2023-09-12 18:29:03 +05:30 committed by GitHub
parent e7b7e4a18d
commit ee0f1270cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 84 additions and 58 deletions

View file

@ -75,6 +75,7 @@ colored = { workspace = true, features = ["no-color"]}
insta = { workspace = true, features = ["filters"] }
insta-cmd = { version = "0.4.0" }
tempfile = "3.6.0"
test-case = { workspace = true }
ureq = { version = "2.6.2", features = [] }
[target.'cfg(target_os = "windows")'.dependencies]

View file

@ -8,6 +8,7 @@ use std::sync::Mutex;
use std::time::{Duration, SystemTime};
use anyhow::{Context, Result};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use ruff::message::Message;
@ -15,6 +16,7 @@ use ruff::settings::Settings;
use ruff::warn_user;
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::{DiagnosticKind, Fix};
use ruff_notebook::NotebookIndex;
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
@ -193,6 +195,7 @@ impl Cache {
key: T,
messages: &[Message],
imports: &ImportMap,
notebook_index: Option<&NotebookIndex>,
) {
let source = if let Some(msg) = messages.first() {
msg.file.source_text().to_owned()
@ -226,6 +229,7 @@ impl Cache {
imports: imports.clone(),
messages,
source,
notebook_index: notebook_index.cloned(),
};
self.new_files.lock().unwrap().insert(path, file);
}
@ -263,6 +267,8 @@ pub(crate) struct FileCache {
///
/// This will be empty if `messages` is empty.
source: String,
/// Notebook index if this file is a Jupyter Notebook.
notebook_index: Option<NotebookIndex>,
}
impl FileCache {
@ -283,7 +289,12 @@ impl FileCache {
})
.collect()
};
Diagnostics::new(messages, self.imports.clone())
let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() {
FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())])
} else {
FxHashMap::default()
};
Diagnostics::new(messages, self.imports.clone(), notebook_indexes)
}
}
@ -350,16 +361,19 @@ mod tests {
use anyhow::Result;
use ruff_python_ast::imports::ImportMap;
#[test]
fn same_results() {
use test_case::test_case;
#[test_case("../ruff/resources/test/fixtures", "ruff_tests/cache_same_results_ruff"; "ruff_fixtures")]
#[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")]
fn same_results(package_root: &str, cache_dir_path: &str) {
let mut cache_dir = temp_dir();
cache_dir.push("ruff_tests/cache_same_results");
cache_dir.push(cache_dir_path);
let _ = fs::remove_dir_all(&cache_dir);
cache::init(&cache_dir).unwrap();
let settings = AllSettings::default();
let package_root = fs::canonicalize("../ruff/resources/test/fixtures").unwrap();
let package_root = fs::canonicalize(package_root).unwrap();
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
@ -444,9 +458,6 @@ mod tests {
.unwrap();
}
// Not stored in the cache.
expected_diagnostics.notebooks.clear();
got_diagnostics.notebooks.clear();
assert_eq!(expected_diagnostics, got_diagnostics);
}
@ -614,6 +625,7 @@ mod tests {
imports: ImportMap::new(),
messages: Vec::new(),
source: String::new(),
notebook_index: None,
},
);

View file

@ -11,6 +11,7 @@ use itertools::Itertools;
use log::{debug, error, warn};
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use rustc_hash::FxHashMap;
use ruff::message::Message;
use ruff::registry::Rule;
@ -156,6 +157,7 @@ pub(crate) fn check(
TextSize::default(),
)],
ImportMap::default(),
FxHashMap::default(),
)
} else {
warn!(

View file

@ -26,7 +26,7 @@ use ruff::source_kind::SourceKind;
use ruff::{fs, IOError, SyntaxError};
use ruff_diagnostics::Diagnostic;
use ruff_macros::CacheKey;
use ruff_notebook::{Cell, Notebook, NotebookError};
use ruff_notebook::{Cell, Notebook, NotebookError, NotebookIndex};
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::{PySourceType, SourceType, TomlSourceType};
use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder};
@ -64,16 +64,20 @@ pub(crate) struct Diagnostics {
pub(crate) messages: Vec<Message>,
pub(crate) fixed: FxHashMap<String, FixTable>,
pub(crate) imports: ImportMap,
pub(crate) notebooks: FxHashMap<String, Notebook>,
pub(crate) notebook_indexes: FxHashMap<String, NotebookIndex>,
}
impl Diagnostics {
pub(crate) fn new(messages: Vec<Message>, imports: ImportMap) -> Self {
pub(crate) fn new(
messages: Vec<Message>,
imports: ImportMap,
notebook_indexes: FxHashMap<String, NotebookIndex>,
) -> Self {
Self {
messages,
fixed: FxHashMap::default(),
imports,
notebooks: FxHashMap::default(),
notebook_indexes,
}
}
@ -94,6 +98,7 @@ impl Diagnostics {
TextSize::default(),
)],
ImportMap::default(),
FxHashMap::default(),
)
} else {
match path {
@ -130,7 +135,7 @@ impl AddAssign for Diagnostics {
}
}
}
self.notebooks.extend(other.notebooks);
self.notebook_indexes.extend(other.notebook_indexes);
}
}
@ -341,7 +346,13 @@ pub(crate) fn lint_path(
if let Some((cache, relative_path, key)) = caching {
// We don't cache parsing errors.
if parse_error.is_none() {
cache.update(relative_path.to_owned(), key, &messages, &imports);
cache.update(
relative_path.to_owned(),
key,
&messages,
&imports,
source_kind.as_ipy_notebook().map(Notebook::index),
);
}
}
@ -359,12 +370,13 @@ pub(crate) fn lint_path(
);
}
let notebooks = if let SourceKind::IpyNotebook(notebook) = source_kind {
let notebook_indexes = if let SourceKind::IpyNotebook(notebook) = source_kind {
FxHashMap::from_iter([(
path.to_str()
.ok_or_else(|| anyhow!("Unable to parse filename: {:?}", path))?
.to_string(),
notebook,
// Index needs to be computed always to store in cache.
notebook.index().clone(),
)])
} else {
FxHashMap::default()
@ -374,7 +386,7 @@ pub(crate) fn lint_path(
messages,
fixed: FxHashMap::from_iter([(fs::relativize_path(path), fixed)]),
imports,
notebooks,
notebook_indexes,
})
}
@ -498,7 +510,7 @@ pub(crate) fn lint_stdin(
fixed,
)]),
imports,
notebooks: FxHashMap::default(),
notebook_indexes: FxHashMap::default(),
})
}

View file

@ -177,7 +177,7 @@ impl Printer {
return Ok(());
}
let context = EmitterContext::new(&diagnostics.notebooks);
let context = EmitterContext::new(&diagnostics.notebook_indexes);
match self.format {
SerializationFormat::Json => {
@ -364,7 +364,7 @@ impl Printer {
writeln!(writer)?;
}
let context = EmitterContext::new(&diagnostics.notebooks);
let context = EmitterContext::new(&diagnostics.notebook_indexes);
TextEmitter::default()
.with_show_fix_status(show_fix_status(self.autofix_level))
.with_show_source(self.flags.intersects(Flags::SHOW_SOURCE))