Include file permissions in key for cached files (#5901)

Reimplements https://github.com/astral-sh/ruff/pull/3104
Closes https://github.com/astral-sh/ruff/issues/5726

Note that we will generate the hash for a cache key twice in normal
operation. Once to check for the cached item and again to update the
cache. We could optimize this by generating the hash once in
`diagnostics::lint_file` and passing the `u64` into `get` and `update`.
We'd probably want to wrap it in a `CacheKeyHash` enum for type safety.

## Test plan

Unit tests for Windows and Unix.

Manual test with case from issue

```
❯ touch fake.py
❯ chmod +x fake.py
❯ ./target/debug/ruff --select EXE fake.py
fake.py:1:1: EXE002 The file is executable but no shebang is present
Found 1 error.
❯ chmod -x fake.py
❯ ./target/debug/ruff --select EXE fake.py
```
This commit is contained in:
Zanie Blue 2023-07-25 12:06:47 -05:00 committed by GitHub
parent cbf6085375
commit 3000a47fe8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 284 additions and 122 deletions

1
Cargo.lock generated
View file

@ -1995,6 +1995,7 @@ dependencies = [
"ruff",
"ruff_cache",
"ruff_diagnostics",
"ruff_macros",
"ruff_python_ast",
"ruff_python_formatter",
"ruff_python_stdlib",

View file

@ -30,6 +30,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_formatter = { path = "../ruff_python_formatter" }
ruff_text_size = { workspace = true }
ruff_textwrap = { path = "../ruff_textwrap" }
ruff_macros = { path = "../ruff_macros" }
annotate-snippets = { version = "0.9.1", features = ["color"] }
anyhow = { workspace = true }

View file

@ -1,4 +1,3 @@
# NOTE: sync with cache::invalidation test
a = 1
__all__ = list(["a", "b"])

View file

@ -164,21 +164,20 @@ impl Cache {
path.strip_prefix(&self.package.package_root).ok()
}
/// Get the cached results for a single file at relative `path`. This uses
/// `file_last_modified` to determine if the results are still accurate
/// Get the cached results for a single file at relative `path`. This
/// uses `key` to determine if the results are still accurate.
/// (i.e. if the file hasn't been modified since the cached run).
///
/// This returns `None` if `file_last_modified` differs from the cached
/// timestamp or if the cache doesn't contain results for the file.
pub(crate) fn get(
&self,
path: &RelativePath,
file_last_modified: SystemTime,
) -> Option<&FileCache> {
/// This returns `None` if `key` differs from the cached key or if the
/// cache doesn't contain results for the file.
pub(crate) fn get<T: CacheKey>(&self, path: &RelativePath, key: &T) -> Option<&FileCache> {
let file = self.package.files.get(path)?;
let mut hasher = CacheKeyHasher::new();
key.cache_key(&mut hasher);
// Make sure the file hasn't changed since the cached run.
if file.last_modified != file_last_modified {
if file.key != hasher.finish() {
return None;
}
@ -188,10 +187,10 @@ impl Cache {
}
/// Add or update a file cache at `path` relative to the package root.
pub(crate) fn update(
pub(crate) fn update<T: CacheKey>(
&self,
path: RelativePathBuf,
last_modified: SystemTime,
key: T,
messages: &[Message],
imports: &ImportMap,
) {
@ -218,8 +217,11 @@ impl Cache {
})
.collect();
let mut hasher = CacheKeyHasher::new();
key.cache_key(&mut hasher);
let file = FileCache {
last_modified,
key: hasher.finish(),
last_seen: AtomicU64::new(self.last_seen_cache),
imports: imports.clone(),
messages,
@ -244,8 +246,8 @@ struct PackageCache {
/// On disk representation of the cache per source file.
#[derive(Deserialize, Debug, Serialize)]
pub(crate) struct FileCache {
/// Timestamp when the file was last modified before the (cached) check.
last_modified: SystemTime,
/// Key that determines if the cached item is still valid.
key: u64,
/// Timestamp when we last linted this file.
///
/// Represented as the number of milliseconds since Unix epoch. This will
@ -327,20 +329,27 @@ pub(crate) fn init(path: &Path) -> Result<()> {
#[cfg(test)]
mod tests {
use filetime::{set_file_mtime, FileTime};
use std::env::temp_dir;
use std::fs;
use std::io::{self, Write};
use std::io;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicU64;
use std::time::SystemTime;
use itertools::Itertools;
use ruff::settings::{flags, AllSettings};
use ruff_cache::CACHE_DIR_NAME;
use ruff_python_ast::imports::ImportMap;
use crate::cache::RelativePathBuf;
use crate::cache::{self, Cache, FileCache};
use crate::diagnostics::{lint_path, Diagnostics};
use std::sync::atomic::AtomicU64;
use anyhow::Result;
use ruff_python_ast::imports::ImportMap;
#[test]
fn same_results() {
let mut cache_dir = temp_dir();
@ -442,52 +451,99 @@ mod tests {
}
#[test]
fn invalidation() {
// NOTE: keep in sync with actual file.
const SOURCE: &[u8] = b"# NOTE: sync with cache::invalidation test\na = 1\n\n__all__ = list([\"a\", \"b\"])\n";
fn cache_adds_file_on_lint() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let mut cache_dir = temp_dir();
cache_dir.push("ruff_tests/cache_invalidation");
let _ = fs::remove_dir_all(&cache_dir);
cache::init(&cache_dir).unwrap();
let settings = AllSettings::default();
let package_root = fs::canonicalize("resources/test/fixtures/cache_mutable").unwrap();
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
let test_cache = TestCache::new("cache_adds_file_on_lint");
let cache = test_cache.open();
test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
let path = package_root.join("source.py");
let mut expected_diagnostics = lint_path(
&path,
Some(&package_root),
&settings,
Some(&cache),
flags::Noqa::Enabled,
flags::FixMode::Generate,
)
.unwrap();
assert_eq!(cache.new_files.lock().unwrap().len(), 1);
cache.store().unwrap();
let cache = test_cache.open();
test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"A single new file should be added to the cache"
);
cache.store().unwrap();
}
let tests = [
// File change.
(|path| {
let mut file = fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(path)?;
file.write_all(SOURCE)?;
file.sync_data()?;
Ok(|_| Ok(()))
}) as fn(&Path) -> io::Result<fn(&Path) -> io::Result<()>>,
// Regression for issue #3086.
#[cfg(unix)]
|path| {
flip_execute_permission_bit(path)?;
Ok(flip_execute_permission_bit)
},
];
#[test]
fn cache_adds_files_on_lint() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("cache_adds_files_on_lint");
let cache = test_cache.open();
test_cache.write_source_file("source_1.py", source);
test_cache.write_source_file("source_2.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
cache.store().unwrap();
let cache = test_cache.open();
test_cache
.lint_file_with_cache("source_1.py", &cache)
.expect("Failed to lint test file");
test_cache
.lint_file_with_cache("source_2.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
2,
"Both files should be added to the cache"
);
cache.store().unwrap();
}
#[test]
fn cache_invalidated_on_file_modified_time() {
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let test_cache = TestCache::new("cache_invalidated_on_file_modified_time");
let cache = test_cache.open();
let source_path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
let expected_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
cache.store().unwrap();
let cache = test_cache.open();
// Update the modified time of the file to a time in the future
set_file_mtime(
source_path,
FileTime::from_system_time(SystemTime::now() + std::time::Duration::from_secs(1)),
)
.unwrap();
let got_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.expect("Failed to lint test file");
assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
assert_eq!(
expected_diagnostics, got_diagnostics,
"The diagnostics should not change"
);
}
#[test]
fn cache_invalidated_on_permission_change() {
// Regression test for issue #3086.
#[cfg(unix)]
#[allow(clippy::items_after_statements)]
@ -498,55 +554,62 @@ mod tests {
file.set_permissions(PermissionsExt::from_mode(perms.mode() ^ 0o111))
}
for change_file in tests {
let cleanup = change_file(&path).unwrap();
#[cfg(windows)]
#[allow(clippy::items_after_statements)]
fn flip_read_only_permission(path: &Path) -> io::Result<()> {
let file = fs::OpenOptions::new().write(true).open(path)?;
let mut perms = file.metadata()?.permissions();
perms.set_readonly(!perms.readonly());
file.set_permissions(perms)
}
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
let mut got_diagnostics = lint_path(
&path,
Some(&package_root),
&settings,
Some(&cache),
flags::Noqa::Enabled,
flags::FixMode::Generate,
)
let test_cache = TestCache::new("cache_invalidated_on_permission_change");
let cache = test_cache.open();
let path = test_cache.write_source_file("source.py", source);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
let expected_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.unwrap();
cleanup(&path).unwrap();
cache.store().unwrap();
let cache = test_cache.open();
assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"cache must not be used"
);
// Flip the permissions on the file
#[cfg(unix)]
flip_execute_permission_bit(&path).unwrap();
#[cfg(windows)]
flip_read_only_permission(&path).unwrap();
// Not store in the cache.
expected_diagnostics.source_kind.clear();
got_diagnostics.source_kind.clear();
assert!(expected_diagnostics == got_diagnostics);
}
let got_diagnostics = test_cache
.lint_file_with_cache("source.py", &cache)
.unwrap();
assert_eq!(
cache.new_files.lock().unwrap().len(),
1,
"Cache should not be used, the file should be treated as new and added to the cache"
);
assert_eq!(
expected_diagnostics, got_diagnostics,
"The diagnostics should not change"
);
}
#[test]
fn remove_old_files() {
let mut cache_dir = temp_dir();
cache_dir.push("ruff_tests/cache_remove_old_files");
let _ = fs::remove_dir_all(&cache_dir);
cache::init(&cache_dir).unwrap();
fn cache_removes_stale_files_on_store() {
let test_cache = TestCache::new("cache_removes_stale_files_on_store");
let mut cache = test_cache.open();
let settings = AllSettings::default();
let package_root =
fs::canonicalize("resources/test/fixtures/cache_remove_old_files").unwrap();
let mut cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
// Add a file to the cache that hasn't been linted or seen since the
// '70s!
// Add a file to the cache that hasn't been linted or seen since the '70s!
let old_path_key = RelativePathBuf::from("old.py");
cache.package.files.insert(
PathBuf::from("old.py"),
old_path_key,
FileCache {
last_modified: SystemTime::UNIX_EPOCH,
key: 123,
last_seen: AtomicU64::new(123),
imports: ImportMap::new(),
messages: Vec::new(),
@ -555,26 +618,97 @@ mod tests {
);
// Now actually lint a file.
let path = package_root.join("source.py");
lint_path(
&path,
Some(&package_root),
&settings,
Some(&cache),
flags::Noqa::Enabled,
flags::FixMode::Generate,
)
.unwrap();
let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n";
test_cache.write_source_file("new.py", source);
let new_path_key = RelativePathBuf::from("new.py");
assert_eq!(cache.new_files.lock().unwrap().len(), 0);
test_cache
.lint_file_with_cache("new.py", &cache)
.expect("Failed to lint test file");
// Storing the cache should remove the old (`old.py`) file.
cache.store().unwrap();
// So we when we open the cache again it shouldn't contain `old.py`.
let cache = Cache::open(&cache_dir, package_root, &settings.lib);
let cache = test_cache.open();
assert_eq!(cache.package.files.len(), 1, "didn't remove the old file");
assert!(
!cache.package.files.contains_key(&path),
"removed the wrong file"
cache.package.files.keys().collect_vec() == vec![&new_path_key],
"Only the new file should be present"
);
}
struct TestCache {
cache_dir: PathBuf,
package_root: PathBuf,
settings: AllSettings,
}
impl TestCache {
fn new(name: &str) -> Self {
// Build a new cache directory and clear it
let mut test_dir = temp_dir();
test_dir.push("ruff_tests/cache");
test_dir.push(name);
let _ = fs::remove_dir_all(&test_dir);
// Create separate directories for the cache and the test package
let cache_dir = test_dir.join("cache");
let package_root = test_dir.join("package");
cache::init(&cache_dir).unwrap();
fs::create_dir(package_root.clone()).unwrap();
let settings = AllSettings::default();
Self {
cache_dir,
package_root,
settings,
}
}
fn write_source_file(&self, path: &str, contents: &[u8]) -> PathBuf {
let path = self.package_root.join(path);
let mut file = fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(&*path)
.unwrap();
file.write_all(contents).unwrap();
file.sync_data().unwrap();
path
}
fn open(&self) -> Cache {
Cache::open(
&self.cache_dir,
self.package_root.clone(),
&self.settings.lib,
)
}
fn lint_file_with_cache(
&self,
path: &str,
cache: &Cache,
) -> Result<Diagnostics, anyhow::Error> {
lint_path(
&self.package_root.join(path),
Some(&self.package_root),
&self.settings,
Some(cache),
flags::Noqa::Enabled,
flags::FixMode::Generate,
)
}
}
impl Drop for TestCache {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.cache_dir);
}
}
}

View file

@ -6,13 +6,17 @@ use std::io::Write;
use std::ops::AddAssign;
use std::path::Path;
use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use colored::Colorize;
use filetime::FileTime;
use log::{debug, error, warn};
use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
use crate::cache::Cache;
use ruff::jupyter::Notebook;
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError;
@ -23,11 +27,35 @@ use ruff::settings::{flags, AllSettings, Settings};
use ruff::source_kind::SourceKind;
use ruff::{fs, IOError};
use ruff_diagnostics::Diagnostic;
use ruff_macros::CacheKey;
use ruff_python_ast::imports::ImportMap;
use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
use crate::cache::Cache;
#[derive(CacheKey)]
pub(crate) struct FileCacheKey {
/// Timestamp when the file was last modified before the (cached) check.
file_last_modified: FileTime,
/// Permissions of the file before the (cached) check.
file_permissions_mode: u32,
}
impl FileCacheKey {
fn from_path(path: &Path) -> io::Result<FileCacheKey> {
// Construct a cache key for the file
let metadata = path.metadata()?;
#[cfg(unix)]
let permissions = metadata.permissions().mode();
#[cfg(windows)]
let permissions: u32 = metadata.permissions().readonly().into();
Ok(FileCacheKey {
file_last_modified: FileTime::from_last_modification_time(&metadata),
file_permissions_mode: permissions,
})
}
}
#[derive(Debug, Default, PartialEq)]
pub(crate) struct Diagnostics {
@ -117,12 +145,16 @@ pub(crate) fn lint_path(
let relative_path = cache
.relative_path(path)
.expect("wrong package cache for file");
let last_modified = path.metadata()?.modified()?;
if let Some(cache) = cache.get(relative_path, last_modified) {
let cache_key = FileCacheKey::from_path(path).context("Failed to create cache key")?;
if let Some(cache) = cache.get(relative_path, &cache_key) {
return Ok(cache.as_diagnostics(path));
}
Some((cache, relative_path, last_modified))
// Stash the file metadata for later so when we update the cache it reflects the prerun
// information
Some((cache, relative_path, cache_key))
}
_ => None,
};
@ -269,15 +301,10 @@ pub(crate) fn lint_path(
let imports = imports.unwrap_or_default();
if let Some((cache, relative_path, file_last_modified)) = caching {
if let Some((cache, relative_path, key)) = caching {
// We don't cache parsing errors.
if parse_error.is_none() {
cache.update(
relative_path.to_owned(),
file_last_modified,
&messages,
&imports,
);
cache.update(relative_path.to_owned(), key, &messages, &imports);
}
}