mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:26 +00:00
refactor: Introduce CacheKey
trait (#3323)
This PR introduces a new `CacheKey` trait for types that can be used as a cache key. I'm not entirely sure if this is worth the "overhead", but I was surprised to find `HashableHashSet` and got scared when I looked at the time complexity of the `hash` function. These implementations must be extremely slow in hashed collections. I then searched for usages and quickly realized that only the cache uses these `Hash` implementations, where performance is less sensitive. This PR introduces a new `CacheKey` trait to communicate the difference between a hash and computing a key for the cache. The new trait can be implemented for types that don't implement `Hash` for performance reasons, and we can define additional constraints on the implementation: For example, we'll want to enforce portability when we add remote caching support. Using a different trait further allows us not to implement it for types without stable identities (e.g. pointers) or use other implementations than the standard hash function.
This commit is contained in:
parent
d1288dc2b1
commit
cdbe2ee496
53 changed files with 842 additions and 331 deletions
15
crates/ruff_cache/Cargo.toml
Normal file
15
crates/ruff_cache/Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "ruff_cache"
|
||||
version = "0.0.0"
|
||||
publish = false
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
itertools = { workspace = true }
|
||||
globset = { version = "0.4.9" }
|
||||
regex = { workspace = true }
|
||||
filetime = { version = "0.2.17" }
|
||||
|
||||
[dev-dependencies]
|
||||
ruff_macros = { path = "../ruff_macros" }
|
376
crates/ruff_cache/src/cache_key.rs
Normal file
376
crates/ruff_cache/src/cache_key.rs
Normal file
|
@ -0,0 +1,376 @@
|
|||
use itertools::Itertools;
|
||||
use regex::Regex;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct CacheKeyHasher {
|
||||
inner: DefaultHasher,
|
||||
}
|
||||
|
||||
impl CacheKeyHasher {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: DefaultHasher::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CacheKeyHasher {
|
||||
type Target = DefaultHasher;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for CacheKeyHasher {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
/// A type that be used as part of a cache key.
|
||||
///
|
||||
/// A cache looks up artefacts by a cache key. Many cache keys are composed of sub-keys. For example,
|
||||
/// caching the lint results of a file depend at least on the file content, the user settings, and linter version.
|
||||
/// Types implementing the [`CacheKey`] trait can be used as part of a cache key by which artefacts are queried.
|
||||
///
|
||||
/// ## Implementing `CacheKey`
|
||||
///
|
||||
/// You can derive [`CacheKey`] with `#[derive(CacheKey)]` if all fields implement [`CacheKey`]. The resulting
|
||||
/// cache key will be the combination of the values from calling `cache_key` on each field.
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_macros::CacheKey;
|
||||
///
|
||||
/// #[derive(CacheKey)]
|
||||
/// struct Test {
|
||||
/// name: String,
|
||||
/// version: u32,
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// If you need more control over computing the cache key, you can of course implement the [`CacheKey]` yourself:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_cache::{CacheKey, CacheKeyHasher};
|
||||
///
|
||||
/// struct Test {
|
||||
/// name: String,
|
||||
/// version: u32,
|
||||
/// other: String
|
||||
/// }
|
||||
///
|
||||
/// impl CacheKey for Test {
|
||||
/// fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
/// self.name.cache_key(state);
|
||||
/// self.version.cache_key(state);
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// ## Portability
|
||||
///
|
||||
/// Ideally, the cache key is portable across platforms but this is not yet a strict requirement.
|
||||
///
|
||||
/// ## Using [`Hash`]
|
||||
///
|
||||
/// You can defer to the [`Hash`] implementation for non-composite types.
|
||||
/// Be aware, that the [`Hash`] implementation may not be portable.
|
||||
///
|
||||
/// ## Why a new trait rather than reusing [`Hash`]?
|
||||
/// The main reason is that hashes and cache keys have different constraints:
|
||||
///
|
||||
/// * Cache keys are less performance sensitive: Hashes must be super fast to compute for performant hashed-collections. That's
|
||||
/// why some standard types don't implement [`Hash`] where it would be safe to to implement [`CacheKey`], e.g. `HashSet`
|
||||
/// * Cache keys must be deterministic where hash keys do not have this constraint. That's why pointers don't implement [`CacheKey`] but they implement [`Hash`].
|
||||
/// * Ideally, cache keys are portable
|
||||
///
|
||||
/// [`Hash`](std::hash::Hash)
|
||||
pub trait CacheKey {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher);
|
||||
|
||||
fn cache_key_slice(data: &[Self], state: &mut CacheKeyHasher)
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
for piece in data {
|
||||
piece.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for bool {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u8(u8::from(*self));
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for char {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u32(*self as u32);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for usize {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for u128 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u128(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for u64 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u64(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for u32 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u32(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for u16 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u16(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for u8 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_u8(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for isize {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_isize(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for i128 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_i128(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for i64 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_i64(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for i32 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_i32(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for i16 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_i16(*self);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for i8 {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_i8(*self);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_cache_key_tuple {
|
||||
() => (
|
||||
impl CacheKey for () {
|
||||
#[inline]
|
||||
fn cache_key(&self, _state: &mut CacheKeyHasher) {}
|
||||
}
|
||||
);
|
||||
|
||||
( $($name:ident)+) => (
|
||||
impl<$($name: CacheKey),+> CacheKey for ($($name,)+) where last_type!($($name,)+): ?Sized {
|
||||
#[allow(non_snake_case)]
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
let ($(ref $name,)+) = *self;
|
||||
$($name.cache_key(state);)+
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! last_type {
|
||||
($a:ident,) => { $a };
|
||||
($a:ident, $($rest_a:ident,)+) => { last_type!($($rest_a,)+) };
|
||||
}
|
||||
|
||||
impl_cache_key_tuple! {}
|
||||
impl_cache_key_tuple! { T }
|
||||
impl_cache_key_tuple! { T B }
|
||||
impl_cache_key_tuple! { T B C }
|
||||
impl_cache_key_tuple! { T B C D }
|
||||
impl_cache_key_tuple! { T B C D E }
|
||||
impl_cache_key_tuple! { T B C D E F }
|
||||
impl_cache_key_tuple! { T B C D E F G }
|
||||
impl_cache_key_tuple! { T B C D E F G H }
|
||||
impl_cache_key_tuple! { T B C D E F G H I }
|
||||
impl_cache_key_tuple! { T B C D E F G H I J }
|
||||
impl_cache_key_tuple! { T B C D E F G H I J K }
|
||||
impl_cache_key_tuple! { T B C D E F G H I J K L }
|
||||
|
||||
impl CacheKey for str {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.hash(&mut **state);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for String {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.hash(&mut **state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: CacheKey> CacheKey for Option<T> {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
match self {
|
||||
None => state.write_usize(0),
|
||||
Some(value) => {
|
||||
state.write_usize(1);
|
||||
value.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: CacheKey> CacheKey for [T] {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
CacheKey::cache_key_slice(self, state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ?Sized + CacheKey> CacheKey for &T {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
(**self).cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ?Sized + CacheKey> CacheKey for &mut T {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
(**self).cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> CacheKey for Vec<T>
|
||||
where
|
||||
T: CacheKey,
|
||||
{
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
CacheKey::cache_key_slice(self, state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, S> CacheKey for HashMap<K, V, S>
|
||||
where
|
||||
K: CacheKey + Ord,
|
||||
V: CacheKey,
|
||||
{
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
for (key, value) in self
|
||||
.iter()
|
||||
.sorted_by(|(left, _), (right, _)| left.cmp(right))
|
||||
{
|
||||
key.cache_key(state);
|
||||
value.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V: CacheKey + Ord, S> CacheKey for HashSet<V, S> {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
for value in self.iter().sorted() {
|
||||
value.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V: CacheKey> CacheKey for BTreeSet<V> {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
for item in self {
|
||||
item.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: CacheKey + Ord, V: CacheKey> CacheKey for BTreeMap<K, V> {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
state.write_usize(self.len());
|
||||
|
||||
for (key, value) in self {
|
||||
key.cache_key(state);
|
||||
value.cache_key(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for Path {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.hash(&mut **state);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for PathBuf {
|
||||
#[inline]
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.hash(&mut **state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<V: ?Sized> CacheKey for Cow<'_, V>
|
||||
where
|
||||
V: CacheKey + ToOwned,
|
||||
{
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
(**self).cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for Regex {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.as_str().cache_key(state);
|
||||
}
|
||||
}
|
9
crates/ruff_cache/src/filetime.rs
Normal file
9
crates/ruff_cache/src/filetime.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
use crate::{CacheKey, CacheKeyHasher};
|
||||
use filetime::FileTime;
|
||||
use std::hash::Hash;
|
||||
|
||||
impl CacheKey for FileTime {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.hash(&mut **state);
|
||||
}
|
||||
}
|
14
crates/ruff_cache/src/globset.rs
Normal file
14
crates/ruff_cache/src/globset.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use crate::{CacheKey, CacheKeyHasher};
|
||||
use globset::{Glob, GlobMatcher};
|
||||
|
||||
impl CacheKey for GlobMatcher {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.glob().cache_key(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheKey for Glob {
|
||||
fn cache_key(&self, state: &mut CacheKeyHasher) {
|
||||
self.glob().cache_key(state);
|
||||
}
|
||||
}
|
15
crates/ruff_cache/src/lib.rs
Normal file
15
crates/ruff_cache/src/lib.rs
Normal file
|
@ -0,0 +1,15 @@
|
|||
mod cache_key;
|
||||
pub mod filetime;
|
||||
pub mod globset;
|
||||
|
||||
pub use cache_key::{CacheKey, CacheKeyHasher};
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub const CACHE_DIR_NAME: &str = ".ruff_cache";
|
||||
|
||||
/// Return the cache directory for a given project root. Defers to the
|
||||
/// `RUFF_CACHE_DIR` environment variable, if set.
|
||||
pub fn cache_dir(project_root: &Path) -> PathBuf {
|
||||
project_root.join(CACHE_DIR_NAME)
|
||||
}
|
108
crates/ruff_cache/tests/cache_key.rs
Normal file
108
crates/ruff_cache/tests/cache_key.rs
Normal file
|
@ -0,0 +1,108 @@
|
|||
use ruff_cache::{CacheKey, CacheKeyHasher};
|
||||
use ruff_macros::CacheKey;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
#[derive(CacheKey, Hash)]
|
||||
struct UnitStruct;
|
||||
|
||||
#[derive(CacheKey, Hash)]
|
||||
struct NamedFieldsStruct {
|
||||
a: String,
|
||||
b: String,
|
||||
}
|
||||
|
||||
#[derive(CacheKey, Hash)]
|
||||
struct UnnamedFieldsStruct(String, String);
|
||||
|
||||
#[derive(CacheKey, Hash)]
|
||||
enum Enum {
|
||||
Unit,
|
||||
UnnamedFields(String, String),
|
||||
NamedFields { a: String, b: String },
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_struct_cache_key() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
UnitStruct.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
UnitStruct.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn named_field_struct() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
let named_fields = NamedFieldsStruct {
|
||||
a: "Hello".into(),
|
||||
b: "World".into(),
|
||||
};
|
||||
|
||||
named_fields.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
named_fields.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unnamed_field_struct() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
let unnamed_fields = UnnamedFieldsStruct("Hello".into(), "World".into());
|
||||
|
||||
unnamed_fields.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
unnamed_fields.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_unit_variant() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
let variant = Enum::Unit;
|
||||
variant.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
variant.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_named_fields_variant() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
let variant = Enum::NamedFields {
|
||||
a: "Hello".to_string(),
|
||||
b: "World".to_string(),
|
||||
};
|
||||
variant.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
variant.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_unnamed_fields_variant() {
|
||||
let mut key = CacheKeyHasher::new();
|
||||
|
||||
let variant = Enum::UnnamedFields("Hello".to_string(), "World".to_string());
|
||||
variant.cache_key(&mut key);
|
||||
|
||||
let mut hash = DefaultHasher::new();
|
||||
variant.hash(&mut hash);
|
||||
|
||||
assert_eq!(hash.finish(), key.finish());
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue