Make hashes optional (#910)

There is no guarantee that indexes provide hashes at all or the sha256
we support specifically. [PEP
503](https://peps.python.org/pep-0503/#specification):

> The URL SHOULD include a hash in the form of a URL fragment with the
following syntax: #<hashname>=<hashvalue>, where <hashname> is the
lowercase name of the hash function (such as sha256) and <hashvalue> is
the hex encoded digest.

We instead use the url as input to generate a hash when caching.
This commit is contained in:
konsti 2024-01-14 22:32:55 +01:00 committed by GitHub
parent 9ad19b7e54
commit 5ffbfadf66
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 104 additions and 41 deletions

View file

@ -24,10 +24,12 @@ requirements-txt = { path = "../requirements-txt" }
anyhow = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
data-encoding = { workspace = true }
fs-err = { workspace = true }
once_cell = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
sha2 = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true }

View file

@ -18,6 +18,12 @@ impl DistributionId {
}
}
impl DistributionId {
pub fn as_str(&self) -> &str {
&self.0
}
}
/// A unique identifier for a resource, like a URL or a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ResourceId(String);

View file

@ -652,11 +652,19 @@ impl Identifier for Url {
impl Identifier for File {
fn distribution_id(&self) -> DistributionId {
DistributionId::new(self.hashes.sha256.clone())
if let Some(hash) = &self.hashes.sha256 {
DistributionId::new(hash)
} else {
self.url.distribution_id()
}
}
fn resource_id(&self) -> ResourceId {
ResourceId::new(self.hashes.sha256.clone())
if let Some(hash) = &self.hashes.sha256 {
ResourceId::new(hash)
} else {
self.url.resource_id()
}
}
}