Read hash from URL fragment if --hashes are omitted (#6731)

## Summary

Like pip, if `--hashes` are omitted but there's a valid hash in the URL
fragment, we should respect it.

Closes https://github.com/astral-sh/uv/issues/6701.
This commit is contained in:
Charlie Marsh 2024-08-27 20:03:01 -04:00 committed by GitHub
parent b01c16a666
commit 8fdb3a882e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 265 additions and 84 deletions

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use pep508_rs::{MarkerEnvironment, UnnamedRequirement};
use pypi_types::{Requirement, RequirementSource};
use pypi_types::{Hashes, ParsedUrl, Requirement, RequirementSource};
use uv_normalize::ExtraName;
use crate::VerbatimParsedUrl;
@ -82,6 +82,26 @@ impl UnresolvedRequirement {
Self::Unnamed(requirement) => requirement.url.is_editable(),
}
}
/// Return the hashes of the requirement, as specified in the URL fragment.
pub fn hashes(&self) -> Option<Hashes> {
match self {
Self::Named(requirement) => {
let RequirementSource::Url { ref url, .. } = requirement.source else {
return None;
};
let fragment = url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
Self::Unnamed(requirement) => {
let ParsedUrl::Archive(ref url) = requirement.url.parsed_url else {
return None;
};
let fragment = url.url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
}
}
}
impl From<Requirement> for UnresolvedRequirementSpecification {

View file

@ -163,6 +163,68 @@ impl Hashes {
}
digests
}
/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
pub fn parse_fragment(fragment: &str) -> Result<Self, HashError> {
let mut parts = fragment.split('=');
// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;
// Ensure there are no more parts.
if parts.next().is_some() {
return Err(HashError::InvalidFragment(fragment.to_string()));
}
match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(HashError::UnsupportedHashAlgorithm(fragment.to_string())),
}
}
}
impl FromStr for Hashes {
@ -343,10 +405,16 @@ pub enum HashError {
#[error("Unexpected hash (expected `<algorithm>:<hash>`): {0}")]
InvalidStructure(String),
#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
InvalidFragment(String),
#[error(
"Unsupported hash algorithm: `{0}` (expected one of: `md5`, `sha256`, `sha384`, or `sha512`)"
"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `{0}`"
)]
UnsupportedHashAlgorithm(String),
#[error("Non-UTF-8 hash digest")]
NonUtf8(#[from] std::str::Utf8Error),
}
#[cfg(test)]

View file

@ -69,68 +69,6 @@ impl SimpleHtml {
Ok(Some(url))
}
/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
fn parse_hash(fragment: &str) -> Result<Hashes, Error> {
let mut parts = fragment.split('=');
// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;
// Ensure there are no more parts.
if parts.next().is_some() {
return Err(Error::FragmentParse(fragment.to_string()));
}
match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(Error::UnsupportedHashAlgorithm(fragment.to_string())),
}
}
/// Parse a [`File`] from an `<a>` tag.
fn parse_anchor(link: &HTMLTag) -> Result<File, Error> {
// Extract the href.
@ -145,14 +83,13 @@ impl SimpleHtml {
// Extract the hash, which should be in the fragment.
let decoded = html_escape::decode_html_entities(href);
let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') {
let fragment = urlencoding::decode(fragment)
.map_err(|_| Error::FragmentParse(fragment.to_string()))?;
let fragment = urlencoding::decode(fragment)?;
(
path,
if fragment.trim().is_empty() {
Hashes::default()
} else {
Self::parse_hash(&fragment)?
Hashes::parse_fragment(&fragment)?
},
)
} else {
@ -199,7 +136,7 @@ impl SimpleHtml {
match dist_info_metadata.as_ref() {
"true" => Some(CoreMetadata::Bool(true)),
"false" => Some(CoreMetadata::Bool(false)),
fragment => Some(CoreMetadata::Hashes(Self::parse_hash(fragment)?)),
fragment => Some(CoreMetadata::Hashes(Hashes::parse_fragment(fragment)?)),
}
} else {
None
@ -235,6 +172,9 @@ pub enum Error {
#[error(transparent)]
Utf8(#[from] std::str::Utf8Error),
#[error(transparent)]
FromUtf8(#[from] std::string::FromUtf8Error),
#[error("Failed to parse URL: {0}")]
UrlParse(String, #[source] url::ParseError),
@ -253,13 +193,8 @@ pub enum Error {
#[error("Missing hash attribute on URL: {0}")]
MissingHash(String),
#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
FragmentParse(String),
#[error(
"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: {0}"
)]
UnsupportedHashAlgorithm(String),
#[error(transparent)]
FragmentParse(#[from] pypi_types::HashError),
#[error("Invalid `requires-python` specifier: {0}")]
Pep440(#[source] pep440_rs::VersionSpecifiersParseError),
@ -851,7 +786,7 @@ mod tests {
"#;
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
let result = SimpleHtml::parse(text, &base).unwrap_err();
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61");
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`");
}
#[test]

View file

@ -8,7 +8,7 @@ use distribution_types::{
};
use pep440_rs::Version;
use pypi_types::{
HashDigest, HashError, Requirement, RequirementSource, ResolverMarkerEnvironment,
HashDigest, HashError, Hashes, Requirement, RequirementSource, ResolverMarkerEnvironment,
};
use uv_configuration::HashCheckingMode;
use uv_normalize::PackageName;
@ -153,6 +153,21 @@ impl HashStrategy {
}
};
let digests = if digests.is_empty() {
// If there are no hashes, and the distribution is URL-based, attempt to extract
// it from the fragment.
requirement
.hashes()
.map(Hashes::into_digests)
.unwrap_or_default()
} else {
// Parse the hashes.
digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?
};
if digests.is_empty() {
// Under `--require-hashes`, every requirement must include a hash.
if mode.is_require() {
@ -164,12 +179,6 @@ impl HashStrategy {
continue;
}
// Parse the hashes.
let digests = digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?;
hashes.insert(id, digests);
}

View file

@ -3407,7 +3407,7 @@ fn require_hashes_unknown_algorithm() -> Result<()> {
----- stdout -----
----- stderr -----
error: Unsupported hash algorithm: `foo` (expected one of: `md5`, `sha256`, `sha384`, or `sha512`)
error: Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `foo`
"###
);
@ -5006,6 +5006,155 @@ fn require_hashes_registry_invalid_hash() -> Result<()> {
Ok(())
}
/// Include the hash in the URL directly.
#[test]
fn require_hashes_url() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?;
uv_snapshot!(context.pip_sync()
.env_remove("UV_EXCLUDE_NEWER")
.arg("requirements.txt")
.arg("--require-hashes"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Installed 1 package in [TIME]
+ iniconfig==2.0.0 (from https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374)
"###
);
Ok(())
}
/// Include an irrelevant fragment in the URL.
#[test]
fn require_hashes_url_other_fragment() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#foo=bar")?;
uv_snapshot!(context.pip_sync()
.env_remove("UV_EXCLUDE_NEWER")
.arg("requirements.txt")
.arg("--require-hashes"), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
error: In `--require-hashes` mode, all requirement must have a hash, but none were provided for: iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#foo=bar
"###
);
Ok(())
}
/// Include an invalid hash in the URL directly.
#[test]
fn require_hashes_url_invalid() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?;
uv_snapshot!(context.pip_sync()
.env_remove("UV_EXCLUDE_NEWER")
.arg("requirements.txt")
.arg("--require-hashes"), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
error: Failed to prepare distributions
Caused by: Failed to fetch wheel: iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Caused by: Hash mismatch for `iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374`
Expected:
sha256:c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Computed:
sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
"###
);
Ok(())
}
/// Ignore the (valid) hash on the fragment if (invalid) hashes are provided directly.
#[test]
fn require_hashes_url_ignore() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 --hash sha256:c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?;
uv_snapshot!(context.pip_sync()
.env_remove("UV_EXCLUDE_NEWER")
.arg("requirements.txt")
.arg("--require-hashes"), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
error: Failed to prepare distributions
Caused by: Failed to fetch wheel: iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Caused by: Hash mismatch for `iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374`
Expected:
sha256:c6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
Computed:
sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
"###
);
Ok(())
}
/// Include the hash in the URL directly.
#[test]
fn require_hashes_url_unnamed() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt
.write_str("https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?;
uv_snapshot!(context.pip_sync()
.env_remove("UV_EXCLUDE_NEWER")
.arg("requirements.txt")
.arg("--require-hashes"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Installed 1 package in [TIME]
+ iniconfig==2.0.0 (from https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374)
"###
);
Ok(())
}
/// Sync to a `--target` directory with a built distribution.
#[test]
fn target_built_distribution() -> Result<()> {