diff --git a/Cargo.lock b/Cargo.lock index ddf5052cf..becd72598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -848,6 +848,7 @@ dependencies = [ "anyhow", "cache-key", "chrono", + "data-encoding", "distribution-filename", "fs-err", "once_cell", @@ -860,6 +861,7 @@ dependencies = [ "rustc-hash", "serde", "serde_json", + "sha2", "thiserror", "url", ] diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index c88e51a99..023fc6406 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -24,10 +24,12 @@ requirements-txt = { path = "../requirements-txt" } anyhow = { workspace = true } chrono = { workspace = true, features = ["serde"] } +data-encoding = { workspace = true } fs-err = { workspace = true } once_cell = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +sha2 = { workspace = true } thiserror = { workspace = true } url = { workspace = true } diff --git a/crates/distribution-types/src/id.rs b/crates/distribution-types/src/id.rs index 27e2cc9e3..39701dfd1 100644 --- a/crates/distribution-types/src/id.rs +++ b/crates/distribution-types/src/id.rs @@ -18,6 +18,12 @@ impl DistributionId { } } +impl DistributionId { + pub fn as_str(&self) -> &str { + &self.0 + } +} + /// A unique identifier for a resource, like a URL or a Git repository. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ResourceId(String); diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 747909c6a..da417daeb 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -652,11 +652,19 @@ impl Identifier for Url { impl Identifier for File { fn distribution_id(&self) -> DistributionId { - DistributionId::new(self.hashes.sha256.clone()) + if let Some(hash) = &self.hashes.sha256 { + DistributionId::new(hash) + } else { + self.url.distribution_id() + } } fn resource_id(&self) -> ResourceId { - ResourceId::new(self.hashes.sha256.clone()) + if let Some(hash) = &self.hashes.sha256 { + ResourceId::new(hash) + } else { + self.url.resource_id() + } } } diff --git a/crates/puffin-client/src/html.rs b/crates/puffin-client/src/html.rs index 3939ebac1..531ae6f3e 100644 --- a/crates/puffin-client/src/html.rs +++ b/crates/puffin-client/src/html.rs @@ -82,7 +82,9 @@ impl SimpleHtml { let sha256 = std::str::from_utf8(value.as_bytes())?; let sha256 = sha256.to_string(); - Ok(Hashes { sha256 }) + Ok(Hashes { + sha256: Some(sha256), + }) } /// Parse a [`File`] from an `` tag. @@ -96,10 +98,12 @@ impl SimpleHtml { .ok_or(Error::MissingHref)?; let href = std::str::from_utf8(href.as_bytes())?; - // Split the base and the fragment. - let (path, fragment) = href - .split_once('#') - .ok_or_else(|| Error::MissingHash(href.to_string()))?; + let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') { + // Extract the hash, which should be in the fragment. + (path, Self::parse_hash(fragment)?) + } else { + (href, Hashes::default()) + }; // Extract the filename from the body text, which MUST match that of // the final path component of the URL. @@ -108,9 +112,6 @@ impl SimpleHtml { .last() .ok_or_else(|| Error::MissingFilename(href.to_string()))?; - // Extract the hash, which should be in the fragment. - let hashes = Self::parse_hash(fragment)?; - // Extract the `requires-python` field, which should be set on the // `data-requires-python` attribute. let requires_python = if let Some(requires_python) = @@ -234,7 +235,9 @@ mod tests { dist_info_metadata: None, filename: "Jinja2-3.1.2-py3-none-any.whl", hashes: Hashes { - sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + sha256: Some( + "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + ), }, requires_python: None, size: None, @@ -288,7 +291,9 @@ mod tests { dist_info_metadata: None, filename: "Jinja2-3.1.2-py3-none-any.whl", hashes: Hashes { - sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + sha256: Some( + "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61", + ), }, requires_python: None, size: None, @@ -301,6 +306,57 @@ mod tests { "###); } + #[test] + fn parse_missing_hash() { + let text = r#" + + + +

Links for jinja2

+
Jinja2-3.1.2-py3-none-any.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "Jinja2-3.1.2-py3-none-any.whl", + hashes: Hashes { + sha256: None, + }, + requires_python: None, + size: None, + upload_time: None, + url: "/whl/Jinja2-3.1.2-py3-none-any.whl", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_missing_href() { let text = r#" @@ -335,23 +391,6 @@ mod tests { insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link"); } - #[test] - fn parse_missing_hash() { - let text = r#" - - - -

Links for jinja2

- Jinja2-3.1.2-py3-none-any.whl
- - - - "#; - let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); - let result = SimpleHtml::parse(text, &base).unwrap_err(); - insta::assert_display_snapshot!(result, @"Missing hash attribute on URL: /whl/Jinja2-3.1.2-py3-none-any.whl"); - } - #[test] fn parse_missing_hash_value() { let text = r#" diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index eed88d340..ee9a2bff1 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -16,8 +16,8 @@ use zip::ZipArchive; use distribution_filename::WheelFilename; use distribution_types::{ - DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, LocalEditable, Name, PathSourceDist, - RemoteSource, SourceDist, + DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, Identifier, LocalEditable, Name, + PathSourceDist, RemoteSource, SourceDist, }; use install_wheel_rs::read_dist_info; use platform_tags::Tags; @@ -104,7 +104,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { CacheBucket::BuiltWheels, WheelCache::Index(®istry_source_dist.index) .remote_wheel_dir(registry_source_dist.name.as_ref()) - .join(®istry_source_dist.file.hashes.sha256[..16]), + .join(®istry_source_dist.file.distribution_id().as_str()[..16]), ); self.url( @@ -160,7 +160,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { CacheBucket::BuiltWheels, WheelCache::Index(®istry_source_dist.index) .remote_wheel_dir(registry_source_dist.name.as_ref()) - .join(®istry_source_dist.file.hashes.sha256[..16]), + .join(®istry_source_dist.file.distribution_id().as_str()[..16]), ); self.url_metadata( diff --git a/crates/puffin-resolver/src/resolution.rs b/crates/puffin-resolver/src/resolution.rs index 813f9ba2d..ee0ebb0cf 100644 --- a/crates/puffin-resolver/src/resolution.rs +++ b/crates/puffin-resolver/src/resolution.rs @@ -270,8 +270,10 @@ impl std::fmt::Display for DisplayResolutionGraph<'_> { .filter(|hashes| !hashes.is_empty()) { for hash in hashes { - writeln!(f, " \\")?; - write!(f, " --hash={hash}")?; + if let Some(hash) = hash.to_string() { + writeln!(f, " \\")?; + write!(f, " --hash={hash}")?; + } } } } diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index d7c989d76..5f6c99c3e 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -85,14 +85,18 @@ impl Yanked { /// /// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we /// only support SHA 256 atm. -#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)] +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] pub struct Hashes { - // TODO(charlie): Hashes should be optional. - pub sha256: String, + pub sha256: Option, } -impl std::fmt::Display for Hashes { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "sha256:{}", self.sha256) +impl Hashes { + /// Format as `:`. + /// + /// Currently limited to SHA256. + pub fn to_string(&self) -> Option { + self.sha256 + .as_ref() + .map(|sha256| format!("sha256:{sha256}")) } }