From 963f2a778bfc410966c7cb1944ca3ba6170cdcbb Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 18 May 2024 22:19:55 -0400 Subject: [PATCH] URL-decode hashes in HTML fragments (#3655) ## Summary Closes https://github.com/astral-sh/uv/issues/3654 --- crates/uv-client/src/html.rs | 60 +++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 5d315c6d4..8ad5cc420 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -145,12 +145,14 @@ impl SimpleHtml { // Extract the hash, which should be in the fragment. let decoded = html_escape::decode_html_entities(href); let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') { + let fragment = urlencoding::decode(fragment) + .map_err(|_| Error::FragmentParse(fragment.to_string()))?; ( path, if fragment.trim().is_empty() { Hashes::default() } else { - Self::parse_hash(fragment)? + Self::parse_hash(&fragment)? }, ) } else { @@ -488,6 +490,62 @@ mod tests { "###); } + #[test] + fn parse_encoded_fragment() { + let text = r#" + + + +

Links for jinja2

+ Jinja2-3.1.2-py3-none-any.whl
+ + + + "#; + let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap(); + let result = SimpleHtml::parse(text, &base).unwrap(); + insta::assert_debug_snapshot!(result, @r###" + SimpleHtml { + base: BaseUrl( + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "download.pytorch.org", + ), + ), + port: None, + path: "/whl/jinja2/", + query: None, + fragment: None, + }, + ), + files: [ + File { + dist_info_metadata: None, + filename: "Jinja2-3.1.2-py3-none-any.whl", + hashes: Hashes { + md5: None, + sha256: Some( + "4095ada29e51070f7d199a0a5bdf5c8d8e238e03f0bf4dcc02571e78c9ae800d", + ), + sha384: None, + sha512: None, + }, + requires_python: None, + size: None, + upload_time: None, + url: "/whl/Jinja2-3.1.2-py3-none-any.whl#sha256%3D4095ada29e51070f7d199a0a5bdf5c8d8e238e03f0bf4dcc02571e78c9ae800d", + yanked: None, + }, + ], + } + "###); + } + #[test] fn parse_quoted_filepath() { let text = r#"