Remove unnecessary hashing from IDs (#2998)

## Summary

In all of these ID types, we pass values to `cache_key::digest` prior to
passing to `DistributionId` or `ResourceId`. The `DistributionId` and
`ResourceId` are then hashed later, since they're used as keys in hash
maps.

It seems wasteful to hash the value, then hash the hashed value? So this
PR modifies those structs to be enums that can take one of a fixed set
of types.
This commit is contained in:
Charlie Marsh 2024-04-11 17:23:37 -04:00 committed by GitHub
parent a71bd60238
commit 8507ba872f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 33 additions and 73 deletions

View file

@ -1,9 +1,11 @@
use std::fmt::{Display, Formatter};
use std::path::PathBuf;
use cache_key::CanonicalUrl;
use cache_key::{CanonicalUrl, RepositoryUrl};
use url::Url;
use pep440_rs::Version;
use pypi_types::HashDigest;
use uv_normalize::PackageName;
/// A unique identifier for a package. A package can either be identified by a name (e.g., `black`)
@ -13,7 +15,7 @@ pub enum PackageId {
/// The identifier consists of a package name.
Name(PackageName),
/// The identifier consists of a URL.
Url(String),
Url(CanonicalUrl),
}
impl PackageId {
@ -24,7 +26,7 @@ impl PackageId {
/// Create a new [`PackageId`] from a URL.
pub fn from_url(url: &Url) -> Self {
Self::Url(cache_key::digest(&CanonicalUrl::new(url)))
Self::Url(CanonicalUrl::new(url))
}
}
@ -43,7 +45,7 @@ pub enum VersionId {
/// The identifier consists of a package name and version.
NameVersion(PackageName, Version),
/// The identifier consists of a URL.
Url(String),
Url(CanonicalUrl),
}
impl VersionId {
@ -54,7 +56,7 @@ impl VersionId {
/// Create a new [`VersionId`] from a URL.
pub fn from_url(url: &Url) -> Self {
Self::Url(cache_key::digest(&CanonicalUrl::new(url)))
Self::Url(CanonicalUrl::new(url))
}
}
@ -81,28 +83,22 @@ impl Display for VersionId {
/// that the ID is unique within a single invocation of the resolver (and so, e.g., a hash of
/// the URL would also be sufficient).
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct DistributionId(String);
impl DistributionId {
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
}
impl DistributionId {
pub fn as_str(&self) -> &str {
&self.0
}
pub enum DistributionId {
Url(CanonicalUrl),
PathBuf(PathBuf),
Digest(HashDigest),
AbsoluteUrl(String),
RelativeUrl(String, String),
}
/// A unique identifier for a resource, like a URL or a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ResourceId(String);
impl ResourceId {
pub fn new(id: impl Into<String>) -> Self {
Self(id.into())
}
pub enum ResourceId {
Url(RepositoryUrl),
PathBuf(PathBuf),
Digest(HashDigest),
AbsoluteUrl(String),
RelativeUrl(String, String),
}
impl From<&Self> for VersionId {

View file

@ -791,18 +791,18 @@ impl RemoteSource for Dist {
impl Identifier for Url {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&cache_key::CanonicalUrl::new(self)))
DistributionId::Url(cache_key::CanonicalUrl::new(self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&cache_key::RepositoryUrl::new(self)))
ResourceId::Url(cache_key::RepositoryUrl::new(self))
}
}
impl Identifier for File {
fn distribution_id(&self) -> DistributionId {
if let Some(hash) = self.hashes.first() {
DistributionId::new(&*hash.digest)
DistributionId::Digest(hash.clone())
} else {
self.url.distribution_id()
}
@ -810,7 +810,7 @@ impl Identifier for File {
fn resource_id(&self) -> ResourceId {
if let Some(hash) = self.hashes.first() {
ResourceId::new(&*hash.digest)
ResourceId::Digest(hash.clone())
} else {
self.url.resource_id()
}
@ -819,67 +819,31 @@ impl Identifier for File {
impl Identifier for Path {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&self))
DistributionId::PathBuf(self.to_path_buf())
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&self))
}
}
impl Identifier for String {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&self))
}
}
impl Identifier for &str {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&self))
}
}
impl Identifier for (&str, &str) {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&self))
}
}
impl Identifier for (&Url, &str) {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(cache_key::digest(&self))
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(cache_key::digest(&self))
ResourceId::PathBuf(self.to_path_buf())
}
}
impl Identifier for FileLocation {
fn distribution_id(&self) -> DistributionId {
match self {
Self::RelativeUrl(base, url) => (base.as_str(), url.as_str()).distribution_id(),
Self::AbsoluteUrl(url) => url.distribution_id(),
Self::RelativeUrl(base, url) => {
DistributionId::RelativeUrl(base.to_string(), url.to_string())
}
Self::AbsoluteUrl(url) => DistributionId::AbsoluteUrl(url.to_string()),
Self::Path(path) => path.distribution_id(),
}
}
fn resource_id(&self) -> ResourceId {
match self {
Self::RelativeUrl(base, url) => (base.as_str(), url.as_str()).resource_id(),
Self::AbsoluteUrl(url) => url.resource_id(),
Self::RelativeUrl(base, url) => {
ResourceId::RelativeUrl(base.to_string(), url.to_string())
}
Self::AbsoluteUrl(url) => ResourceId::AbsoluteUrl(url.to_string()),
Self::Path(path) => path.resource_id(),
}
}