mirror of
https://github.com/astral-sh/uv.git
synced 2025-09-26 20:19:08 +00:00
Make hashes optional (#910)
There is no guarantee that indexes provide hashes at all or the sha256 we support specifically. [PEP 503](https://peps.python.org/pep-0503/#specification): > The URL SHOULD include a hash in the form of a URL fragment with the following syntax: #<hashname>=<hashvalue>, where <hashname> is the lowercase name of the hash function (such as sha256) and <hashvalue> is the hex encoded digest. We instead use the url as input to generate a hash when caching.
This commit is contained in:
parent
9ad19b7e54
commit
5ffbfadf66
8 changed files with 104 additions and 41 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -848,6 +848,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"cache-key",
|
||||
"chrono",
|
||||
"data-encoding",
|
||||
"distribution-filename",
|
||||
"fs-err",
|
||||
"once_cell",
|
||||
|
@ -860,6 +861,7 @@ dependencies = [
|
|||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"url",
|
||||
]
|
||||
|
|
|
@ -24,10 +24,12 @@ requirements-txt = { path = "../requirements-txt" }
|
|||
|
||||
anyhow = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
data-encoding = { workspace = true }
|
||||
fs-err = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
|
|
@ -18,6 +18,12 @@ impl DistributionId {
|
|||
}
|
||||
}
|
||||
|
||||
impl DistributionId {
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A unique identifier for a resource, like a URL or a Git repository.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct ResourceId(String);
|
||||
|
|
|
@ -652,11 +652,19 @@ impl Identifier for Url {
|
|||
|
||||
impl Identifier for File {
|
||||
fn distribution_id(&self) -> DistributionId {
|
||||
DistributionId::new(self.hashes.sha256.clone())
|
||||
if let Some(hash) = &self.hashes.sha256 {
|
||||
DistributionId::new(hash)
|
||||
} else {
|
||||
self.url.distribution_id()
|
||||
}
|
||||
}
|
||||
|
||||
fn resource_id(&self) -> ResourceId {
|
||||
ResourceId::new(self.hashes.sha256.clone())
|
||||
if let Some(hash) = &self.hashes.sha256 {
|
||||
ResourceId::new(hash)
|
||||
} else {
|
||||
self.url.resource_id()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,9 @@ impl SimpleHtml {
|
|||
|
||||
let sha256 = std::str::from_utf8(value.as_bytes())?;
|
||||
let sha256 = sha256.to_string();
|
||||
Ok(Hashes { sha256 })
|
||||
Ok(Hashes {
|
||||
sha256: Some(sha256),
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse a [`File`] from an `<a>` tag.
|
||||
|
@ -96,10 +98,12 @@ impl SimpleHtml {
|
|||
.ok_or(Error::MissingHref)?;
|
||||
let href = std::str::from_utf8(href.as_bytes())?;
|
||||
|
||||
// Split the base and the fragment.
|
||||
let (path, fragment) = href
|
||||
.split_once('#')
|
||||
.ok_or_else(|| Error::MissingHash(href.to_string()))?;
|
||||
let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') {
|
||||
// Extract the hash, which should be in the fragment.
|
||||
(path, Self::parse_hash(fragment)?)
|
||||
} else {
|
||||
(href, Hashes::default())
|
||||
};
|
||||
|
||||
// Extract the filename from the body text, which MUST match that of
|
||||
// the final path component of the URL.
|
||||
|
@ -108,9 +112,6 @@ impl SimpleHtml {
|
|||
.last()
|
||||
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
|
||||
|
||||
// Extract the hash, which should be in the fragment.
|
||||
let hashes = Self::parse_hash(fragment)?;
|
||||
|
||||
// Extract the `requires-python` field, which should be set on the
|
||||
// `data-requires-python` attribute.
|
||||
let requires_python = if let Some(requires_python) =
|
||||
|
@ -234,7 +235,9 @@ mod tests {
|
|||
dist_info_metadata: None,
|
||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||
hashes: Hashes {
|
||||
sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||
sha256: Some(
|
||||
"6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||
),
|
||||
},
|
||||
requires_python: None,
|
||||
size: None,
|
||||
|
@ -288,7 +291,9 @@ mod tests {
|
|||
dist_info_metadata: None,
|
||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||
hashes: Hashes {
|
||||
sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||
sha256: Some(
|
||||
"6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||
),
|
||||
},
|
||||
requires_python: None,
|
||||
size: None,
|
||||
|
@ -301,6 +306,57 @@ mod tests {
|
|||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_missing_hash() {
|
||||
let text = r#"
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Links for jinja2</h1>
|
||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||
</body>
|
||||
</html>
|
||||
<!--TIMESTAMP 1703347410-->
|
||||
"#;
|
||||
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||
let result = SimpleHtml::parse(text, &base).unwrap();
|
||||
insta::assert_debug_snapshot!(result, @r###"
|
||||
SimpleHtml {
|
||||
base: BaseUrl(
|
||||
Url {
|
||||
scheme: "https",
|
||||
cannot_be_a_base: false,
|
||||
username: "",
|
||||
password: None,
|
||||
host: Some(
|
||||
Domain(
|
||||
"download.pytorch.org",
|
||||
),
|
||||
),
|
||||
port: None,
|
||||
path: "/whl/jinja2/",
|
||||
query: None,
|
||||
fragment: None,
|
||||
},
|
||||
),
|
||||
files: [
|
||||
File {
|
||||
dist_info_metadata: None,
|
||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||
hashes: Hashes {
|
||||
sha256: None,
|
||||
},
|
||||
requires_python: None,
|
||||
size: None,
|
||||
upload_time: None,
|
||||
url: "/whl/Jinja2-3.1.2-py3-none-any.whl",
|
||||
yanked: None,
|
||||
},
|
||||
],
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_missing_href() {
|
||||
let text = r#"
|
||||
|
@ -335,23 +391,6 @@ mod tests {
|
|||
insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_missing_hash() {
|
||||
let text = r#"
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Links for jinja2</h1>
|
||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||
</body>
|
||||
</html>
|
||||
<!--TIMESTAMP 1703347410-->
|
||||
"#;
|
||||
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||
let result = SimpleHtml::parse(text, &base).unwrap_err();
|
||||
insta::assert_display_snapshot!(result, @"Missing hash attribute on URL: /whl/Jinja2-3.1.2-py3-none-any.whl");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_missing_hash_value() {
|
||||
let text = r#"
|
||||
|
|
|
@ -16,8 +16,8 @@ use zip::ZipArchive;
|
|||
|
||||
use distribution_filename::WheelFilename;
|
||||
use distribution_types::{
|
||||
DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, LocalEditable, Name, PathSourceDist,
|
||||
RemoteSource, SourceDist,
|
||||
DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, Identifier, LocalEditable, Name,
|
||||
PathSourceDist, RemoteSource, SourceDist,
|
||||
};
|
||||
use install_wheel_rs::read_dist_info;
|
||||
use platform_tags::Tags;
|
||||
|
@ -104,7 +104,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Index(®istry_source_dist.index)
|
||||
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
||||
.join(®istry_source_dist.file.hashes.sha256[..16]),
|
||||
.join(®istry_source_dist.file.distribution_id().as_str()[..16]),
|
||||
);
|
||||
|
||||
self.url(
|
||||
|
@ -160,7 +160,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
CacheBucket::BuiltWheels,
|
||||
WheelCache::Index(®istry_source_dist.index)
|
||||
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
||||
.join(®istry_source_dist.file.hashes.sha256[..16]),
|
||||
.join(®istry_source_dist.file.distribution_id().as_str()[..16]),
|
||||
);
|
||||
|
||||
self.url_metadata(
|
||||
|
|
|
@ -270,8 +270,10 @@ impl std::fmt::Display for DisplayResolutionGraph<'_> {
|
|||
.filter(|hashes| !hashes.is_empty())
|
||||
{
|
||||
for hash in hashes {
|
||||
writeln!(f, " \\")?;
|
||||
write!(f, " --hash={hash}")?;
|
||||
if let Some(hash) = hash.to_string() {
|
||||
writeln!(f, " \\")?;
|
||||
write!(f, " --hash={hash}")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,14 +85,18 @@ impl Yanked {
|
|||
///
|
||||
/// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we
|
||||
/// only support SHA 256 atm.
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default, Serialize, Deserialize)]
|
||||
pub struct Hashes {
|
||||
// TODO(charlie): Hashes should be optional.
|
||||
pub sha256: String,
|
||||
pub sha256: Option<String>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Hashes {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "sha256:{}", self.sha256)
|
||||
impl Hashes {
|
||||
/// Format as `<algorithm>:<hash>`.
|
||||
///
|
||||
/// Currently limited to SHA256.
|
||||
pub fn to_string(&self) -> Option<String> {
|
||||
self.sha256
|
||||
.as_ref()
|
||||
.map(|sha256| format!("sha256:{sha256}"))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue