mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-03 18:38:21 +00:00
Use hasher to compute resolution hash (#5495)
## Summary Addressing one TODO. This should be more efficient.
This commit is contained in:
parent
e8d7c0cb58
commit
561625ed8c
18 changed files with 78 additions and 69 deletions
|
@ -1,22 +1,34 @@
|
|||
use std::hash::Hasher;
|
||||
|
||||
use crate::cache_key::{CacheKey, CacheKeyHasher};
|
||||
use seahash::SeaHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
/// Compute a hex string hash of a `CacheKey` object.
|
||||
///
|
||||
/// The value returned by [`digest`] should be stable across releases and platforms.
|
||||
pub fn digest<H: CacheKey>(hashable: &H) -> String {
|
||||
to_hex(cache_key_u64(hashable))
|
||||
}
|
||||
|
||||
/// Convert a u64 to a hex string.
|
||||
fn to_hex(num: u64) -> String {
|
||||
hex::encode(num.to_le_bytes())
|
||||
}
|
||||
|
||||
/// The value returned by [`cache_digest`] should be stable across releases and platforms.
|
||||
pub fn cache_digest<H: CacheKey>(hashable: &H) -> String {
|
||||
/// Compute a u64 hash of a [`CacheKey`] object.
|
||||
fn cache_key_u64<H: CacheKey>(hashable: &H) -> u64 {
|
||||
let mut hasher = CacheKeyHasher::new();
|
||||
hashable.cache_key(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
to_hex(cache_key_u64(hashable))
|
||||
}
|
||||
|
||||
/// Compute a hex string hash of a hashable object.
|
||||
pub fn hash_digest<H: Hash>(hashable: &H) -> String {
|
||||
/// Compute a u64 hash of a hashable object.
|
||||
fn hash_u64<H: Hash>(hashable: &H) -> u64 {
|
||||
let mut hasher = SeaHasher::new();
|
||||
hashable.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
to_hex(hash_u64(hashable))
|
||||
}
|
||||
|
||||
/// Convert a u64 to a hex string.
|
||||
fn to_hex(num: u64) -> String {
|
||||
hex::encode(num.to_le_bytes())
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
pub use cache_key::{CacheKey, CacheKeyHasher};
|
||||
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
|
||||
pub use digest::digest;
|
||||
pub use digest::{cache_digest, hash_digest};
|
||||
|
||||
mod cache_key;
|
||||
mod canonical_url;
|
||||
|
|
|
@ -6,6 +6,7 @@ use crate::{InstalledMetadata, InstalledVersion, Name};
|
|||
|
||||
/// A distribution which is either installable, is a wheel in our cache or is already installed.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum LocalDist {
|
||||
Cached(CachedDist),
|
||||
Installed(InstalledDist),
|
||||
|
|
|
@ -19,7 +19,7 @@ pub enum FileConversionError {
|
|||
|
||||
/// Internal analog to [`pypi_types::File`].
|
||||
#[derive(
|
||||
Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
|
||||
Debug, Clone, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
|
||||
)]
|
||||
#[archive(check_bytes)]
|
||||
#[archive_attr(derive(Debug))]
|
||||
|
@ -67,7 +67,7 @@ impl File {
|
|||
|
||||
/// While a registry file is generally a remote URL, it can also be a file if it comes from a directory flat indexes.
|
||||
#[derive(
|
||||
Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
|
||||
Debug, Clone, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
|
||||
)]
|
||||
#[archive(check_bytes)]
|
||||
#[archive_attr(derive(Debug))]
|
||||
|
@ -147,13 +147,14 @@ impl Display for FileLocation {
|
|||
#[derive(
|
||||
Debug,
|
||||
Clone,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Hash,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
rkyv::Archive,
|
||||
rkyv::Deserialize,
|
||||
rkyv::Serialize,
|
||||
PartialEq,
|
||||
Eq,
|
||||
)]
|
||||
#[archive(check_bytes)]
|
||||
#[archive_attr(derive(Debug))]
|
||||
|
@ -185,7 +186,7 @@ impl From<UrlString> for String {
|
|||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UrlString {
|
||||
impl Display for UrlString {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
fmt::Display::fmt(&self.0, f)
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ use uv_normalize::PackageName;
|
|||
use crate::{DistributionMetadata, InstalledMetadata, InstalledVersion, Name, VersionOrUrlRef};
|
||||
|
||||
/// A built distribution (wheel) that is installed in a virtual environment.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub enum InstalledDist {
|
||||
/// The distribution was derived from a registry, like `PyPI`.
|
||||
Registry(InstalledRegistryDist),
|
||||
|
@ -30,14 +30,14 @@ pub enum InstalledDist {
|
|||
LegacyEditable(InstalledLegacyEditable),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct InstalledRegistryDist {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct InstalledDirectUrlDist {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
|
@ -47,21 +47,21 @@ pub struct InstalledDirectUrlDist {
|
|||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct InstalledEggInfoFile {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct InstalledEggInfoDirectory {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct InstalledLegacyEditable {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
|
|
|
@ -126,14 +126,14 @@ impl std::fmt::Display for InstalledVersion<'_> {
|
|||
/// Either a built distribution, a wheel, or a source distribution that exists at some location.
|
||||
///
|
||||
/// The location can be an index, URL or path (wheel), or index, URL, path or Git repository (source distribution).
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub enum Dist {
|
||||
Built(BuiltDist),
|
||||
Source(SourceDist),
|
||||
}
|
||||
|
||||
/// A wheel, with its three possible origins (index, url, path)
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum BuiltDist {
|
||||
Registry(RegistryBuiltDist),
|
||||
|
@ -142,7 +142,7 @@ pub enum BuiltDist {
|
|||
}
|
||||
|
||||
/// A source distribution, with its possible origins (index, url, path, git)
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum SourceDist {
|
||||
Registry(RegistrySourceDist),
|
||||
|
@ -153,7 +153,7 @@ pub enum SourceDist {
|
|||
}
|
||||
|
||||
/// A built distribution (wheel) that exists in a registry, like `PyPI`.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct RegistryBuiltWheel {
|
||||
pub filename: WheelFilename,
|
||||
pub file: Box<File>,
|
||||
|
@ -161,7 +161,7 @@ pub struct RegistryBuiltWheel {
|
|||
}
|
||||
|
||||
/// A built distribution (wheel) that exists in a registry, like `PyPI`.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct RegistryBuiltDist {
|
||||
/// All wheels associated with this distribution. It is guaranteed
|
||||
/// that there is at least one wheel.
|
||||
|
@ -191,7 +191,7 @@ pub struct RegistryBuiltDist {
|
|||
}
|
||||
|
||||
/// A built distribution (wheel) that exists at an arbitrary URL.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct DirectUrlBuiltDist {
|
||||
/// We require that wheel urls end in the full wheel filename, e.g.
|
||||
/// `https://example.org/packages/flask-3.0.0-py3-none-any.whl`
|
||||
|
@ -203,7 +203,7 @@ pub struct DirectUrlBuiltDist {
|
|||
}
|
||||
|
||||
/// A built distribution (wheel) that exists in a local directory.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct PathBuiltDist {
|
||||
pub filename: WheelFilename,
|
||||
/// The path to the wheel.
|
||||
|
@ -213,7 +213,7 @@ pub struct PathBuiltDist {
|
|||
}
|
||||
|
||||
/// A source distribution that exists in a registry, like `PyPI`.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct RegistrySourceDist {
|
||||
pub name: PackageName,
|
||||
pub version: Version,
|
||||
|
@ -230,7 +230,7 @@ pub struct RegistrySourceDist {
|
|||
}
|
||||
|
||||
/// A source distribution that exists at an arbitrary URL.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct DirectUrlSourceDist {
|
||||
/// Unlike [`DirectUrlBuiltDist`], we can't require a full filename with a version here, people
|
||||
/// like using e.g. `foo @ https://github.com/org/repo/archive/master.zip`
|
||||
|
@ -244,7 +244,7 @@ pub struct DirectUrlSourceDist {
|
|||
}
|
||||
|
||||
/// A source distribution that exists in a Git repository.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct GitSourceDist {
|
||||
pub name: PackageName,
|
||||
/// The URL without the revision and subdirectory fragment.
|
||||
|
@ -256,7 +256,7 @@ pub struct GitSourceDist {
|
|||
}
|
||||
|
||||
/// A source distribution that exists in a local archive (e.g., a `.tar.gz` file).
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct PathSourceDist {
|
||||
pub name: PackageName,
|
||||
/// The resolved, absolute path to the distribution which we use for installing.
|
||||
|
@ -270,7 +270,7 @@ pub struct PathSourceDist {
|
|||
}
|
||||
|
||||
/// A source distribution that exists in a local directory.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct DirectorySourceDist {
|
||||
pub name: PackageName,
|
||||
/// The resolved, absolute path to the distribution which we use for installing.
|
||||
|
|
|
@ -71,7 +71,7 @@ impl Resolution {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub enum ResolutionDiagnostic {
|
||||
MissingExtra {
|
||||
/// The distribution that was requested with a non-existent extra. For example,
|
||||
|
|
|
@ -12,7 +12,8 @@ use crate::{
|
|||
/// A distribution that can be used for resolution and installation.
|
||||
///
|
||||
/// Either an already-installed distribution or a distribution that can be installed.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum ResolvedDist {
|
||||
Installed(InstalledDist),
|
||||
Installable(Dist),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -7,7 +7,7 @@ use url::Url;
|
|||
/// Metadata for a distribution that was installed via a direct URL.
|
||||
///
|
||||
/// See: <https://packaging.python.org/en/latest/specifications/direct-url-data-structure/>
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", untagged)]
|
||||
pub enum DirectUrl {
|
||||
/// The direct URL is a local directory. For example:
|
||||
|
@ -41,23 +41,23 @@ pub enum DirectUrl {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct DirInfo {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub editable: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ArchiveInfo {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub hash: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub hashes: Option<HashMap<String, String>>,
|
||||
pub hashes: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct VcsInfo {
|
||||
pub vcs: VcsKind,
|
||||
|
@ -67,7 +67,7 @@ pub struct VcsInfo {
|
|||
pub requested_revision: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum VcsKind {
|
||||
Git,
|
||||
|
|
|
@ -92,6 +92,7 @@ impl CoreMetadata {
|
|||
Clone,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Hash,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
rkyv::Archive,
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::path::{Path, PathBuf};
|
|||
|
||||
use url::Url;
|
||||
|
||||
use cache_key::{digest, CanonicalUrl};
|
||||
use cache_key::{cache_digest, CanonicalUrl};
|
||||
use distribution_types::IndexUrl;
|
||||
|
||||
/// Cache wheels and their metadata, both from remote wheels and built from source distributions.
|
||||
|
@ -30,19 +30,19 @@ impl<'a> WheelCache<'a> {
|
|||
WheelCache::Index(IndexUrl::Pypi(_)) => WheelCacheKind::Pypi.root(),
|
||||
WheelCache::Index(url) => WheelCacheKind::Index
|
||||
.root()
|
||||
.join(digest(&CanonicalUrl::new(url))),
|
||||
.join(cache_digest(&CanonicalUrl::new(url))),
|
||||
WheelCache::Url(url) => WheelCacheKind::Url
|
||||
.root()
|
||||
.join(digest(&CanonicalUrl::new(url))),
|
||||
.join(cache_digest(&CanonicalUrl::new(url))),
|
||||
WheelCache::Path(url) => WheelCacheKind::Path
|
||||
.root()
|
||||
.join(digest(&CanonicalUrl::new(url))),
|
||||
.join(cache_digest(&CanonicalUrl::new(url))),
|
||||
WheelCache::Editable(url) => WheelCacheKind::Editable
|
||||
.root()
|
||||
.join(digest(&CanonicalUrl::new(url))),
|
||||
.join(cache_digest(&CanonicalUrl::new(url))),
|
||||
WheelCache::Git(url, sha) => WheelCacheKind::Git
|
||||
.root()
|
||||
.join(digest(&CanonicalUrl::new(url)))
|
||||
.join(cache_digest(&CanonicalUrl::new(url)))
|
||||
.join(sha),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ impl<'a> FlatIndexClient<'a> {
|
|||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::FlatIndex,
|
||||
"html",
|
||||
format!("{}.msgpack", cache_key::digest(&url.to_string())),
|
||||
format!("{}.msgpack", cache_key::cache_digest(&url.to_string())),
|
||||
);
|
||||
let cache_control = match self.client.connectivity() {
|
||||
Connectivity::Online => CacheControl::from(
|
||||
|
|
|
@ -63,7 +63,7 @@ impl GitResolver {
|
|||
fs::create_dir_all(&lock_dir).await?;
|
||||
let repository_url = RepositoryUrl::new(url.repository());
|
||||
let _lock = LockedFile::acquire(
|
||||
lock_dir.join(cache_key::digest(&repository_url)),
|
||||
lock_dir.join(cache_key::cache_digest(&repository_url)),
|
||||
&repository_url,
|
||||
)?;
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ use reqwest_middleware::ClientWithMiddleware;
|
|||
use tracing::{debug, instrument};
|
||||
use url::Url;
|
||||
|
||||
use cache_key::{digest, RepositoryUrl};
|
||||
use cache_key::{cache_digest, RepositoryUrl};
|
||||
|
||||
use crate::git::GitRemote;
|
||||
use crate::{GitOid, GitSha, GitUrl};
|
||||
|
@ -53,7 +53,7 @@ impl GitSource {
|
|||
#[instrument(skip(self), fields(repository = %self.git.repository, rev = ?self.git.precise))]
|
||||
pub fn fetch(self) -> Result<Fetch> {
|
||||
// The path to the repo, within the Git database.
|
||||
let ident = digest(&RepositoryUrl::new(&self.git.repository));
|
||||
let ident = cache_digest(&RepositoryUrl::new(&self.git.repository));
|
||||
let db_path = self.cache.join("db").join(&ident);
|
||||
|
||||
let remote = GitRemote::new(&self.git.repository);
|
||||
|
|
|
@ -17,9 +17,9 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
|
|||
Clone,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
Hash,
|
||||
Serialize,
|
||||
rkyv::Archive,
|
||||
rkyv::Deserialize,
|
||||
|
|
|
@ -197,7 +197,7 @@ impl PythonEnvironment {
|
|||
} else {
|
||||
// Otherwise, use a global lockfile.
|
||||
LockedFile::acquire(
|
||||
env::temp_dir().join(format!("uv-{}.lock", cache_key::digest(&self.0.root))),
|
||||
env::temp_dir().join(format!("uv-{}.lock", cache_key::cache_digest(&self.0.root))),
|
||||
self.0.root.user_display(),
|
||||
)
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
|
|||
use thiserror::Error;
|
||||
use tracing::{trace, warn};
|
||||
|
||||
use cache_key::digest;
|
||||
use cache_key::cache_digest;
|
||||
use install_wheel_rs::Layout;
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::{MarkerEnvironment, StringVersion};
|
||||
|
@ -716,7 +716,7 @@ impl InterpreterInfo {
|
|||
"",
|
||||
// We use the absolute path for the cache entry to avoid cache collisions for relative
|
||||
// paths. But we don't to query the executable with symbolic links resolved.
|
||||
format!("{}.msgpack", digest(&absolute)),
|
||||
format!("{}.msgpack", cache_digest(&absolute)),
|
||||
);
|
||||
|
||||
// We check the timestamp of the canonicalized executable to check if an underlying
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
use itertools::Itertools;
|
||||
use tracing::debug;
|
||||
|
||||
use cache_key::digest;
|
||||
use cache_key::{cache_digest, hash_digest};
|
||||
use distribution_types::Resolution;
|
||||
use uv_cache::{Cache, CacheBucket};
|
||||
use uv_client::Connectivity;
|
||||
|
@ -75,18 +74,12 @@ impl CachedEnvironment {
|
|||
// Hash the resolution by hashing the generated lockfile.
|
||||
// TODO(charlie): If the resolution contains any mutable metadata (like a path or URL
|
||||
// dependency), skip this step.
|
||||
// TODO(charlie): Consider implementing `CacheKey` for `Resolution`.
|
||||
let resolution_hash = digest(
|
||||
&resolution
|
||||
.distributions()
|
||||
.map(std::string::ToString::to_string)
|
||||
.join("\n")
|
||||
.as_bytes(),
|
||||
);
|
||||
let distributions = resolution.distributions().collect::<Vec<_>>();
|
||||
let resolution_hash = hash_digest(&distributions);
|
||||
|
||||
// Hash the interpreter based on its path.
|
||||
// TODO(charlie): Come up with a robust hash for the interpreter.
|
||||
let interpreter_hash = digest(&interpreter.sys_executable());
|
||||
let interpreter_hash = cache_digest(&interpreter.sys_executable());
|
||||
|
||||
// Search in the content-addressed cache.
|
||||
let cache_entry = cache.entry(CacheBucket::Environments, interpreter_hash, resolution_hash);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue