diff --git a/Cargo.lock b/Cargo.lock
index ddf5052cf..becd72598 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -848,6 +848,7 @@ dependencies = [
"anyhow",
"cache-key",
"chrono",
+ "data-encoding",
"distribution-filename",
"fs-err",
"once_cell",
@@ -860,6 +861,7 @@ dependencies = [
"rustc-hash",
"serde",
"serde_json",
+ "sha2",
"thiserror",
"url",
]
diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml
index c88e51a99..023fc6406 100644
--- a/crates/distribution-types/Cargo.toml
+++ b/crates/distribution-types/Cargo.toml
@@ -24,10 +24,12 @@ requirements-txt = { path = "../requirements-txt" }
anyhow = { workspace = true }
chrono = { workspace = true, features = ["serde"] }
+data-encoding = { workspace = true }
fs-err = { workspace = true }
once_cell = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
+sha2 = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true }
diff --git a/crates/distribution-types/src/id.rs b/crates/distribution-types/src/id.rs
index 27e2cc9e3..39701dfd1 100644
--- a/crates/distribution-types/src/id.rs
+++ b/crates/distribution-types/src/id.rs
@@ -18,6 +18,12 @@ impl DistributionId {
}
}
+impl DistributionId {
+ pub fn as_str(&self) -> &str {
+ &self.0
+ }
+}
+
/// A unique identifier for a resource, like a URL or a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ResourceId(String);
diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs
index 747909c6a..da417daeb 100644
--- a/crates/distribution-types/src/lib.rs
+++ b/crates/distribution-types/src/lib.rs
@@ -652,11 +652,19 @@ impl Identifier for Url {
impl Identifier for File {
fn distribution_id(&self) -> DistributionId {
- DistributionId::new(self.hashes.sha256.clone())
+ if let Some(hash) = &self.hashes.sha256 {
+ DistributionId::new(hash)
+ } else {
+ self.url.distribution_id()
+ }
}
fn resource_id(&self) -> ResourceId {
- ResourceId::new(self.hashes.sha256.clone())
+ if let Some(hash) = &self.hashes.sha256 {
+ ResourceId::new(hash)
+ } else {
+ self.url.resource_id()
+ }
}
}
diff --git a/crates/puffin-client/src/html.rs b/crates/puffin-client/src/html.rs
index 3939ebac1..531ae6f3e 100644
--- a/crates/puffin-client/src/html.rs
+++ b/crates/puffin-client/src/html.rs
@@ -82,7 +82,9 @@ impl SimpleHtml {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_string();
- Ok(Hashes { sha256 })
+ Ok(Hashes {
+ sha256: Some(sha256),
+ })
}
/// Parse a [`File`] from an `` tag.
@@ -96,10 +98,12 @@ impl SimpleHtml {
.ok_or(Error::MissingHref)?;
let href = std::str::from_utf8(href.as_bytes())?;
- // Split the base and the fragment.
- let (path, fragment) = href
- .split_once('#')
- .ok_or_else(|| Error::MissingHash(href.to_string()))?;
+ let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') {
+ // Extract the hash, which should be in the fragment.
+ (path, Self::parse_hash(fragment)?)
+ } else {
+ (href, Hashes::default())
+ };
// Extract the filename from the body text, which MUST match that of
// the final path component of the URL.
@@ -108,9 +112,6 @@ impl SimpleHtml {
.last()
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
- // Extract the hash, which should be in the fragment.
- let hashes = Self::parse_hash(fragment)?;
-
// Extract the `requires-python` field, which should be set on the
// `data-requires-python` attribute.
let requires_python = if let Some(requires_python) =
@@ -234,7 +235,9 @@ mod tests {
dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
- sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
+ sha256: Some(
+ "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
+ ),
},
requires_python: None,
size: None,
@@ -288,7 +291,9 @@ mod tests {
dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
- sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
+ sha256: Some(
+ "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
+ ),
},
requires_python: None,
size: None,
@@ -301,6 +306,57 @@ mod tests {
"###);
}
+ #[test]
+ fn parse_missing_hash() {
+ let text = r#"
+
+
+
+ Links for jinja2
+ Jinja2-3.1.2-py3-none-any.whl
+
+
+
+ "#;
+ let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
+ let result = SimpleHtml::parse(text, &base).unwrap();
+ insta::assert_debug_snapshot!(result, @r###"
+ SimpleHtml {
+ base: BaseUrl(
+ Url {
+ scheme: "https",
+ cannot_be_a_base: false,
+ username: "",
+ password: None,
+ host: Some(
+ Domain(
+ "download.pytorch.org",
+ ),
+ ),
+ port: None,
+ path: "/whl/jinja2/",
+ query: None,
+ fragment: None,
+ },
+ ),
+ files: [
+ File {
+ dist_info_metadata: None,
+ filename: "Jinja2-3.1.2-py3-none-any.whl",
+ hashes: Hashes {
+ sha256: None,
+ },
+ requires_python: None,
+ size: None,
+ upload_time: None,
+ url: "/whl/Jinja2-3.1.2-py3-none-any.whl",
+ yanked: None,
+ },
+ ],
+ }
+ "###);
+ }
+
#[test]
fn parse_missing_href() {
let text = r#"
@@ -335,23 +391,6 @@ mod tests {
insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link");
}
- #[test]
- fn parse_missing_hash() {
- let text = r#"
-
-
-