mirror of
https://github.com/astral-sh/uv.git
synced 2025-09-26 20:19:08 +00:00
Make hashes optional (#910)
There is no guarantee that indexes provide hashes at all or the sha256 we support specifically. [PEP 503](https://peps.python.org/pep-0503/#specification): > The URL SHOULD include a hash in the form of a URL fragment with the following syntax: #<hashname>=<hashvalue>, where <hashname> is the lowercase name of the hash function (such as sha256) and <hashvalue> is the hex encoded digest. We instead use the url as input to generate a hash when caching.
This commit is contained in:
parent
9ad19b7e54
commit
5ffbfadf66
8 changed files with 104 additions and 41 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -848,6 +848,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"cache-key",
|
"cache-key",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
"data-encoding",
|
||||||
"distribution-filename",
|
"distribution-filename",
|
||||||
"fs-err",
|
"fs-err",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -860,6 +861,7 @@ dependencies = [
|
||||||
"rustc-hash",
|
"rustc-hash",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sha2",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
|
@ -24,10 +24,12 @@ requirements-txt = { path = "../requirements-txt" }
|
||||||
|
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
chrono = { workspace = true, features = ["serde"] }
|
chrono = { workspace = true, features = ["serde"] }
|
||||||
|
data-encoding = { workspace = true }
|
||||||
fs-err = { workspace = true }
|
fs-err = { workspace = true }
|
||||||
once_cell = { workspace = true }
|
once_cell = { workspace = true }
|
||||||
rustc-hash = { workspace = true }
|
rustc-hash = { workspace = true }
|
||||||
serde = { workspace = true, features = ["derive"] }
|
serde = { workspace = true, features = ["derive"] }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
|
sha2 = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
|
|
|
@ -18,6 +18,12 @@ impl DistributionId {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DistributionId {
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A unique identifier for a resource, like a URL or a Git repository.
|
/// A unique identifier for a resource, like a URL or a Git repository.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
pub struct ResourceId(String);
|
pub struct ResourceId(String);
|
||||||
|
|
|
@ -652,11 +652,19 @@ impl Identifier for Url {
|
||||||
|
|
||||||
impl Identifier for File {
|
impl Identifier for File {
|
||||||
fn distribution_id(&self) -> DistributionId {
|
fn distribution_id(&self) -> DistributionId {
|
||||||
DistributionId::new(self.hashes.sha256.clone())
|
if let Some(hash) = &self.hashes.sha256 {
|
||||||
|
DistributionId::new(hash)
|
||||||
|
} else {
|
||||||
|
self.url.distribution_id()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resource_id(&self) -> ResourceId {
|
fn resource_id(&self) -> ResourceId {
|
||||||
ResourceId::new(self.hashes.sha256.clone())
|
if let Some(hash) = &self.hashes.sha256 {
|
||||||
|
ResourceId::new(hash)
|
||||||
|
} else {
|
||||||
|
self.url.resource_id()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,9 @@ impl SimpleHtml {
|
||||||
|
|
||||||
let sha256 = std::str::from_utf8(value.as_bytes())?;
|
let sha256 = std::str::from_utf8(value.as_bytes())?;
|
||||||
let sha256 = sha256.to_string();
|
let sha256 = sha256.to_string();
|
||||||
Ok(Hashes { sha256 })
|
Ok(Hashes {
|
||||||
|
sha256: Some(sha256),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a [`File`] from an `<a>` tag.
|
/// Parse a [`File`] from an `<a>` tag.
|
||||||
|
@ -96,10 +98,12 @@ impl SimpleHtml {
|
||||||
.ok_or(Error::MissingHref)?;
|
.ok_or(Error::MissingHref)?;
|
||||||
let href = std::str::from_utf8(href.as_bytes())?;
|
let href = std::str::from_utf8(href.as_bytes())?;
|
||||||
|
|
||||||
// Split the base and the fragment.
|
let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') {
|
||||||
let (path, fragment) = href
|
// Extract the hash, which should be in the fragment.
|
||||||
.split_once('#')
|
(path, Self::parse_hash(fragment)?)
|
||||||
.ok_or_else(|| Error::MissingHash(href.to_string()))?;
|
} else {
|
||||||
|
(href, Hashes::default())
|
||||||
|
};
|
||||||
|
|
||||||
// Extract the filename from the body text, which MUST match that of
|
// Extract the filename from the body text, which MUST match that of
|
||||||
// the final path component of the URL.
|
// the final path component of the URL.
|
||||||
|
@ -108,9 +112,6 @@ impl SimpleHtml {
|
||||||
.last()
|
.last()
|
||||||
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
|
.ok_or_else(|| Error::MissingFilename(href.to_string()))?;
|
||||||
|
|
||||||
// Extract the hash, which should be in the fragment.
|
|
||||||
let hashes = Self::parse_hash(fragment)?;
|
|
||||||
|
|
||||||
// Extract the `requires-python` field, which should be set on the
|
// Extract the `requires-python` field, which should be set on the
|
||||||
// `data-requires-python` attribute.
|
// `data-requires-python` attribute.
|
||||||
let requires_python = if let Some(requires_python) =
|
let requires_python = if let Some(requires_python) =
|
||||||
|
@ -234,7 +235,9 @@ mod tests {
|
||||||
dist_info_metadata: None,
|
dist_info_metadata: None,
|
||||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||||
hashes: Hashes {
|
hashes: Hashes {
|
||||||
sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
sha256: Some(
|
||||||
|
"6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||||
|
),
|
||||||
},
|
},
|
||||||
requires_python: None,
|
requires_python: None,
|
||||||
size: None,
|
size: None,
|
||||||
|
@ -288,7 +291,9 @@ mod tests {
|
||||||
dist_info_metadata: None,
|
dist_info_metadata: None,
|
||||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||||
hashes: Hashes {
|
hashes: Hashes {
|
||||||
sha256: "6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
sha256: Some(
|
||||||
|
"6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
|
||||||
|
),
|
||||||
},
|
},
|
||||||
requires_python: None,
|
requires_python: None,
|
||||||
size: None,
|
size: None,
|
||||||
|
@ -301,6 +306,57 @@ mod tests {
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_missing_hash() {
|
||||||
|
let text = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Links for jinja2</h1>
|
||||||
|
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
<!--TIMESTAMP 1703347410-->
|
||||||
|
"#;
|
||||||
|
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||||
|
let result = SimpleHtml::parse(text, &base).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(result, @r###"
|
||||||
|
SimpleHtml {
|
||||||
|
base: BaseUrl(
|
||||||
|
Url {
|
||||||
|
scheme: "https",
|
||||||
|
cannot_be_a_base: false,
|
||||||
|
username: "",
|
||||||
|
password: None,
|
||||||
|
host: Some(
|
||||||
|
Domain(
|
||||||
|
"download.pytorch.org",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
port: None,
|
||||||
|
path: "/whl/jinja2/",
|
||||||
|
query: None,
|
||||||
|
fragment: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
files: [
|
||||||
|
File {
|
||||||
|
dist_info_metadata: None,
|
||||||
|
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||||
|
hashes: Hashes {
|
||||||
|
sha256: None,
|
||||||
|
},
|
||||||
|
requires_python: None,
|
||||||
|
size: None,
|
||||||
|
upload_time: None,
|
||||||
|
url: "/whl/Jinja2-3.1.2-py3-none-any.whl",
|
||||||
|
yanked: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_missing_href() {
|
fn parse_missing_href() {
|
||||||
let text = r#"
|
let text = r#"
|
||||||
|
@ -335,23 +391,6 @@ mod tests {
|
||||||
insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link");
|
insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_missing_hash() {
|
|
||||||
let text = r#"
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<body>
|
|
||||||
<h1>Links for jinja2</h1>
|
|
||||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
<!--TIMESTAMP 1703347410-->
|
|
||||||
"#;
|
|
||||||
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
|
||||||
let result = SimpleHtml::parse(text, &base).unwrap_err();
|
|
||||||
insta::assert_display_snapshot!(result, @"Missing hash attribute on URL: /whl/Jinja2-3.1.2-py3-none-any.whl");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_missing_hash_value() {
|
fn parse_missing_hash_value() {
|
||||||
let text = r#"
|
let text = r#"
|
||||||
|
|
|
@ -16,8 +16,8 @@ use zip::ZipArchive;
|
||||||
|
|
||||||
use distribution_filename::WheelFilename;
|
use distribution_filename::WheelFilename;
|
||||||
use distribution_types::{
|
use distribution_types::{
|
||||||
DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, LocalEditable, Name, PathSourceDist,
|
DirectArchiveUrl, DirectGitUrl, Dist, GitSourceDist, Identifier, LocalEditable, Name,
|
||||||
RemoteSource, SourceDist,
|
PathSourceDist, RemoteSource, SourceDist,
|
||||||
};
|
};
|
||||||
use install_wheel_rs::read_dist_info;
|
use install_wheel_rs::read_dist_info;
|
||||||
use platform_tags::Tags;
|
use platform_tags::Tags;
|
||||||
|
@ -104,7 +104,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
CacheBucket::BuiltWheels,
|
CacheBucket::BuiltWheels,
|
||||||
WheelCache::Index(®istry_source_dist.index)
|
WheelCache::Index(®istry_source_dist.index)
|
||||||
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
||||||
.join(®istry_source_dist.file.hashes.sha256[..16]),
|
.join(®istry_source_dist.file.distribution_id().as_str()[..16]),
|
||||||
);
|
);
|
||||||
|
|
||||||
self.url(
|
self.url(
|
||||||
|
@ -160,7 +160,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
||||||
CacheBucket::BuiltWheels,
|
CacheBucket::BuiltWheels,
|
||||||
WheelCache::Index(®istry_source_dist.index)
|
WheelCache::Index(®istry_source_dist.index)
|
||||||
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
.remote_wheel_dir(registry_source_dist.name.as_ref())
|
||||||
.join(®istry_source_dist.file.hashes.sha256[..16]),
|
.join(®istry_source_dist.file.distribution_id().as_str()[..16]),
|
||||||
);
|
);
|
||||||
|
|
||||||
self.url_metadata(
|
self.url_metadata(
|
||||||
|
|
|
@ -270,8 +270,10 @@ impl std::fmt::Display for DisplayResolutionGraph<'_> {
|
||||||
.filter(|hashes| !hashes.is_empty())
|
.filter(|hashes| !hashes.is_empty())
|
||||||
{
|
{
|
||||||
for hash in hashes {
|
for hash in hashes {
|
||||||
writeln!(f, " \\")?;
|
if let Some(hash) = hash.to_string() {
|
||||||
write!(f, " --hash={hash}")?;
|
writeln!(f, " \\")?;
|
||||||
|
write!(f, " --hash={hash}")?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,14 +85,18 @@ impl Yanked {
|
||||||
///
|
///
|
||||||
/// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we
|
/// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we
|
||||||
/// only support SHA 256 atm.
|
/// only support SHA 256 atm.
|
||||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default, Serialize, Deserialize)]
|
||||||
pub struct Hashes {
|
pub struct Hashes {
|
||||||
// TODO(charlie): Hashes should be optional.
|
pub sha256: Option<String>,
|
||||||
pub sha256: String,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Hashes {
|
impl Hashes {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
/// Format as `<algorithm>:<hash>`.
|
||||||
write!(f, "sha256:{}", self.sha256)
|
///
|
||||||
|
/// Currently limited to SHA256.
|
||||||
|
pub fn to_string(&self) -> Option<String> {
|
||||||
|
self.sha256
|
||||||
|
.as_ref()
|
||||||
|
.map(|sha256| format!("sha256:{sha256}"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue