mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-15 04:49:41 +00:00
Ignore non-hash fragments in HTML API responses (#11107)
## Summary I'm not a fan of registries including fragments here that aren't hashes, but the spec doesn't expressly forbid it. I think it's reasonable to ignore them. Specifically, the spec is here: https://packaging.python.org/en/latest/specifications/simple-repository-api/. It says that: > The URL **SHOULD** include a hash in the form of a URL fragment with the following syntax: `#<hashname>=<hashvalue>`, where `<hashname>`he lowercase name of the hash function (such as sha256) and `<hashvalue>` is the hex encoded digest. But it doesn't mention other fragments. Closes https://github.com/astral-sh/uv/issues/7257.
This commit is contained in:
parent
220821bc39
commit
a440735fac
1 changed files with 65 additions and 7 deletions
|
@ -2,12 +2,12 @@ use std::str::FromStr;
|
||||||
|
|
||||||
use jiff::Timestamp;
|
use jiff::Timestamp;
|
||||||
use tl::HTMLTag;
|
use tl::HTMLTag;
|
||||||
use tracing::{instrument, warn};
|
use tracing::{debug, instrument, warn};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use uv_pep440::VersionSpecifiers;
|
use uv_pep440::VersionSpecifiers;
|
||||||
use uv_pypi_types::LenientVersionSpecifiers;
|
|
||||||
use uv_pypi_types::{BaseUrl, CoreMetadata, File, Hashes, Yanked};
|
use uv_pypi_types::{BaseUrl, CoreMetadata, File, Hashes, Yanked};
|
||||||
|
use uv_pypi_types::{HashError, LenientVersionSpecifiers};
|
||||||
|
|
||||||
/// A parsed structure from PyPI "HTML" index format for a single package.
|
/// A parsed structure from PyPI "HTML" index format for a single package.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -99,7 +99,24 @@ impl SimpleHtml {
|
||||||
if fragment.trim().is_empty() {
|
if fragment.trim().is_empty() {
|
||||||
Hashes::default()
|
Hashes::default()
|
||||||
} else {
|
} else {
|
||||||
Hashes::parse_fragment(&fragment)?
|
match Hashes::parse_fragment(&fragment) {
|
||||||
|
Ok(hashes) => hashes,
|
||||||
|
Err(
|
||||||
|
err
|
||||||
|
@ (HashError::InvalidFragment(..) | HashError::InvalidStructure(..)),
|
||||||
|
) => {
|
||||||
|
// If the URL includes an irrelevant hash (e.g., `#main`), ignore it.
|
||||||
|
debug!("{err}");
|
||||||
|
Hashes::default()
|
||||||
|
}
|
||||||
|
Err(
|
||||||
|
err
|
||||||
|
@ (HashError::UnsupportedHashAlgorithm(..) | HashError::NonUtf8(..)),
|
||||||
|
) => {
|
||||||
|
// If the URL references a hash, but it's unsupported, error.
|
||||||
|
return Err(err.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
@ -836,20 +853,61 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_missing_hash_value() {
|
fn parse_unknown_fragment() {
|
||||||
let text = r#"
|
let text = r#"
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<body>
|
<body>
|
||||||
<h1>Links for jinja2</h1>
|
<h1>Links for jinja2</h1>
|
||||||
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl#sha256">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl#main">Jinja2-3.1.2-py3-none-any.whl</a><br/>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
<!--TIMESTAMP 1703347410-->
|
<!--TIMESTAMP 1703347410-->
|
||||||
"#;
|
"#;
|
||||||
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
|
||||||
let result = SimpleHtml::parse(text, &base).unwrap_err();
|
let result = SimpleHtml::parse(text, &base);
|
||||||
insta::assert_snapshot!(result, @"Unexpected fragment (expected `#sha256=...` or similar) on URL: sha256");
|
insta::assert_debug_snapshot!(result, @r###"
|
||||||
|
Ok(
|
||||||
|
SimpleHtml {
|
||||||
|
base: BaseUrl(
|
||||||
|
Url {
|
||||||
|
scheme: "https",
|
||||||
|
cannot_be_a_base: false,
|
||||||
|
username: "",
|
||||||
|
password: None,
|
||||||
|
host: Some(
|
||||||
|
Domain(
|
||||||
|
"download.pytorch.org",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
port: None,
|
||||||
|
path: "/whl/jinja2/",
|
||||||
|
query: None,
|
||||||
|
fragment: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
files: [
|
||||||
|
File {
|
||||||
|
core_metadata: None,
|
||||||
|
dist_info_metadata: None,
|
||||||
|
data_dist_info_metadata: None,
|
||||||
|
filename: "Jinja2-3.1.2-py3-none-any.whl",
|
||||||
|
hashes: Hashes {
|
||||||
|
md5: None,
|
||||||
|
sha256: None,
|
||||||
|
sha384: None,
|
||||||
|
sha512: None,
|
||||||
|
},
|
||||||
|
requires_python: None,
|
||||||
|
size: None,
|
||||||
|
upload_time: None,
|
||||||
|
url: "/whl/Jinja2-3.1.2-py3-none-any.whl#main",
|
||||||
|
yanked: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue