mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-14 20:39:37 +00:00
Ignore non-hash fragments in HTML API responses (#11107)
## Summary I'm not a fan of registries including fragments here that aren't hashes, but the spec doesn't expressly forbid it. I think it's reasonable to ignore them. Specifically, the spec is here: https://packaging.python.org/en/latest/specifications/simple-repository-api/. It says that: > The URL **SHOULD** include a hash in the form of a URL fragment with the following syntax: `#<hashname>=<hashvalue>`, where `<hashname>`he lowercase name of the hash function (such as sha256) and `<hashvalue>` is the hex encoded digest. But it doesn't mention other fragments. Closes https://github.com/astral-sh/uv/issues/7257.
This commit is contained in:
parent
220821bc39
commit
a440735fac
1 changed files with 65 additions and 7 deletions
|
@ -2,12 +2,12 @@ use std::str::FromStr;
|
|||
|
||||
use jiff::Timestamp;
|
||||
use tl::HTMLTag;
|
||||
use tracing::{instrument, warn};
|
||||
use tracing::{debug, instrument, warn};
|
||||
use url::Url;
|
||||
|
||||
use uv_pep440::VersionSpecifiers;
|
||||
use uv_pypi_types::LenientVersionSpecifiers;
|
||||
use uv_pypi_types::{BaseUrl, CoreMetadata, File, Hashes, Yanked};
|
||||
use uv_pypi_types::{HashError, LenientVersionSpecifiers};
|
||||
|
||||
/// A parsed structure from PyPI "HTML" index format for a single package.
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -99,7 +99,24 @@ impl SimpleHtml {
|
|||
if fragment.trim().is_empty() {
|
||||
Hashes::default()
|
||||
} else {
|
||||
Hashes::parse_fragment(&fragment)?
|
||||
match Hashes::parse_fragment(&fragment) {
|
||||
Ok(hashes) => hashes,
|
||||
Err(
|
||||
err
|
||||
@ (HashError::InvalidFragment(..) | HashError::InvalidStructure(..)),
|
||||
) => {
|
||||
// If the URL includes an irrelevant hash (e.g., `#main`), ignore it.
|
||||
debug!("{err}");
|
||||
Hashes::default()
|
||||
}
|
||||
Err(
|
||||
err
|
||||
@ (HashError::UnsupportedHashAlgorithm(..) | HashError::NonUtf8(..)),
|
||||
) => {
|
||||
// If the URL references a hash, but it's unsupported, error.
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
} else {
|
||||
|
@ -836,20 +853,61 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_missing_hash_value() {
|
||||
fn parse_unknown_fragment() {
|
||||
let text = r#"
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>Links for jinja2</h1>
|
||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha256">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl#main">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||
</body>
|
||||
</html>
|
||||
<!--TIMESTAMP 1703347410-->
|
||||
"#;
|
||||
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||
let result = SimpleHtml::parse(text, &base).unwrap_err();
|
||||
insta::assert_snapshot!(result, @"Unexpected fragment (expected `#sha256=...` or similar) on URL: sha256");
|
||||
let result = SimpleHtml::parse(text, &base);
|
||||
insta::assert_debug_snapshot!(result, @r###"
|
||||
Ok(
|
||||
SimpleHtml {
|
||||
base: BaseUrl(
|
||||
Url {
|
||||
scheme: "https",
|
||||
cannot_be_a_base: false,
|
||||
username: "",
|
||||
password: None,
|
||||
host: Some(
|
||||
Domain(
|
||||
"download.pytorch.org",
|
||||
),
|
||||
),
|
||||
port: None,
|
||||
path: "/whl/jinja2/",
|
||||
query: None,
|
||||
fragment: None,
|
||||
},
|
||||
),
|
||||
files: [
|
||||
File {
|
||||
core_metadata: None,
|
||||
dist_info_metadata: None,
|
||||
data_dist_info_metadata: None,
|
||||
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||
hashes: Hashes {
|
||||
md5: None,
|
||||
sha256: None,
|
||||
sha384: None,
|
||||
sha512: None,
|
||||
},
|
||||
requires_python: None,
|
||||
size: None,
|
||||
upload_time: None,
|
||||
url: "/whl/Jinja2-3.1.2-py3-none-any.whl#main",
|
||||
yanked: None,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue