From ae1edef9c0a4906cd22838b30fd719723c6cb67e Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Wed, 12 Nov 2025 11:27:57 -0500 Subject: [PATCH] Reject ambiguously parsed URLs (#16622) Co-authored-by: Zanie Blue --- Cargo.lock | 2 + crates/uv-auth/src/middleware.rs | 18 +-- crates/uv-auth/src/pyx.rs | 4 +- crates/uv-auth/src/service.rs | 6 +- crates/uv-bin-install/Cargo.toml | 2 +- crates/uv-bin-install/src/lib.rs | 3 +- crates/uv-cache-key/src/canonical_url.rs | 12 +- crates/uv-client/src/base_client.rs | 7 +- crates/uv-client/src/cached_client.rs | 4 +- crates/uv-client/src/flat_index.rs | 2 +- crates/uv-client/src/html.rs | 4 +- crates/uv-client/src/middleware.rs | 2 +- crates/uv-client/src/registry_client.rs | 4 +- crates/uv-distribution-types/src/file.rs | 8 +- .../uv-distribution-types/src/requirement.rs | 4 +- .../uv-distribution/src/metadata/lowering.rs | 4 +- crates/uv-pep508/src/unnamed.rs | 2 +- crates/uv-pep508/src/verbatim_url.rs | 41 +++++- crates/uv-publish/src/trusted_publishing.rs | 4 +- crates/uv-pypi-types/src/direct_url.rs | 4 +- crates/uv-pypi-types/src/parsed_url.rs | 6 +- crates/uv-python/src/downloads.rs | 4 +- crates/uv-redacted/Cargo.toml | 1 + crates/uv-redacted/src/lib.rs | 120 ++++++++++++++---- crates/uv-requirements-txt/src/lib.rs | 31 +++-- crates/uv-requirements/src/source_tree.rs | 2 +- .../src/lock/export/pylock_toml.rs | 2 +- crates/uv-resolver/src/lock/mod.rs | 4 +- crates/uv-resolver/src/redirect.rs | 4 +- crates/uv/tests/it/sync.rs | 43 +++++++ 30 files changed, 257 insertions(+), 97 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae38cdaa3..20f1a93d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5463,6 +5463,7 @@ dependencies = [ "uv-extract", "uv-pep440", "uv-platform", + "uv-redacted", ] [[package]] @@ -6437,6 +6438,7 @@ dependencies = [ "ref-cast", "schemars", "serde", + "thiserror 2.0.17", "url", ] diff --git a/crates/uv-auth/src/middleware.rs b/crates/uv-auth/src/middleware.rs index 82b5b5b58..e57f7bf8d 100644 --- a/crates/uv-auth/src/middleware.rs +++ b/crates/uv-auth/src/middleware.rs @@ -502,7 +502,7 @@ impl AuthMiddleware { // Nothing to insert into the cache if we don't have credentials return next.run(request, extensions).await; }; - let url = DisplaySafeUrl::from(request.url().clone()); + let url = DisplaySafeUrl::from_url(request.url().clone()); if matches!(auth_policy, AuthPolicy::Always) && credentials.password().is_none() { return Err(Error::Middleware(format_err!("Missing password for {url}"))); } @@ -801,7 +801,7 @@ impl AuthMiddleware { } fn tracing_url(request: &Request, credentials: Option<&Authentication>) -> DisplaySafeUrl { - let mut url = DisplaySafeUrl::from(request.url().clone()); + let mut url = DisplaySafeUrl::from_url(request.url().clone()); if let Some(Authentication::Credentials(creds)) = credentials { if let Some(username) = creds.username() { let _ = url.set_username(username); @@ -1990,13 +1990,13 @@ mod tests { let base_url_2 = base_url.join("prefix_2")?; let indexes = Indexes::from_indexes(vec![ Index { - url: DisplaySafeUrl::from(base_url_1.clone()), - root_url: DisplaySafeUrl::from(base_url_1.clone()), + url: DisplaySafeUrl::from_url(base_url_1.clone()), + root_url: DisplaySafeUrl::from_url(base_url_1.clone()), auth_policy: AuthPolicy::Auto, }, Index { - url: DisplaySafeUrl::from(base_url_2.clone()), - root_url: DisplaySafeUrl::from(base_url_2.clone()), + url: DisplaySafeUrl::from_url(base_url_2.clone()), + root_url: DisplaySafeUrl::from_url(base_url_2.clone()), auth_policy: AuthPolicy::Auto, }, ]); @@ -2098,8 +2098,8 @@ mod tests { let base_url = Url::parse(&server.uri())?; let index_url = base_url.join("prefix_1")?; let indexes = Indexes::from_indexes(vec![Index { - url: DisplaySafeUrl::from(index_url.clone()), - root_url: DisplaySafeUrl::from(index_url.clone()), + url: DisplaySafeUrl::from_url(index_url.clone()), + root_url: DisplaySafeUrl::from_url(index_url.clone()), auth_policy: AuthPolicy::Auto, }]); @@ -2153,7 +2153,7 @@ mod tests { } fn indexes_for(url: &Url, policy: AuthPolicy) -> Indexes { - let mut url = DisplaySafeUrl::from(url.clone()); + let mut url = DisplaySafeUrl::from_url(url.clone()); url.set_password(None).ok(); url.set_username("").ok(); Indexes::from_indexes(vec![Index { diff --git a/crates/uv-auth/src/pyx.rs b/crates/uv-auth/src/pyx.rs index c670f410d..1eb3e7407 100644 --- a/crates/uv-auth/src/pyx.rs +++ b/crates/uv-auth/src/pyx.rs @@ -10,7 +10,7 @@ use tracing::debug; use url::Url; use uv_cache_key::CanonicalUrl; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_small_str::SmallString; use uv_state::{StateBucket, StateStore}; use uv_static::EnvVars; @@ -473,7 +473,7 @@ impl PyxTokenStore { #[derive(thiserror::Error, Debug)] pub enum TokenStoreError { #[error(transparent)] - Url(#[from] url::ParseError), + Url(#[from] DisplaySafeUrlError), #[error(transparent)] Io(#[from] io::Error), #[error(transparent)] diff --git a/crates/uv-auth/src/service.rs b/crates/uv-auth/src/service.rs index 7ae012c55..7788924e0 100644 --- a/crates/uv-auth/src/service.rs +++ b/crates/uv-auth/src/service.rs @@ -2,12 +2,12 @@ use serde::{Deserialize, Serialize}; use std::str::FromStr; use thiserror::Error; use url::Url; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; #[derive(Error, Debug)] pub enum ServiceParseError { #[error(transparent)] - InvalidUrl(#[from] url::ParseError), + InvalidUrl(#[from] DisplaySafeUrlError), #[error("Unsupported scheme: {0}")] UnsupportedScheme(String), #[error("HTTPS is required for non-local hosts")] @@ -51,7 +51,7 @@ impl FromStr for Service { // First try parsing as-is let url = match DisplaySafeUrl::parse(s) { Ok(url) => url, - Err(url::ParseError::RelativeUrlWithoutBase) => { + Err(DisplaySafeUrlError::Url(url::ParseError::RelativeUrlWithoutBase)) => { // If it's a relative URL, try prepending https:// let with_https = format!("https://{s}"); DisplaySafeUrl::parse(&with_https)? diff --git a/crates/uv-bin-install/Cargo.toml b/crates/uv-bin-install/Cargo.toml index 6de52683a..9a07b1fc2 100644 --- a/crates/uv-bin-install/Cargo.toml +++ b/crates/uv-bin-install/Cargo.toml @@ -23,6 +23,7 @@ uv-distribution-filename = { workspace = true } uv-extract = { workspace = true } uv-pep440 = { workspace = true } uv-platform = { workspace = true } +uv-redacted = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } reqwest = { workspace = true } @@ -34,4 +35,3 @@ tokio = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } url = { workspace = true } - diff --git a/crates/uv-bin-install/src/lib.rs b/crates/uv-bin-install/src/lib.rs index a9ab3c377..0e3264f38 100644 --- a/crates/uv-bin-install/src/lib.rs +++ b/crates/uv-bin-install/src/lib.rs @@ -24,6 +24,7 @@ use uv_client::{BaseClient, is_transient_network_error}; use uv_extract::{Error as ExtractError, stream}; use uv_pep440::Version; use uv_platform::Platform; +use uv_redacted::DisplaySafeUrl; /// Binary tools that can be installed. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -311,7 +312,7 @@ async fn download_and_unpack( let temp_dir = tempfile::tempdir_in(cache.bucket(CacheBucket::Binaries))?; let response = client - .for_host(&download_url.clone().into()) + .for_host(&DisplaySafeUrl::from_url(download_url.clone())) .get(download_url.clone()) .send() .await diff --git a/crates/uv-cache-key/src/canonical_url.rs b/crates/uv-cache-key/src/canonical_url.rs index 19f5a3d7c..333f4cc45 100644 --- a/crates/uv-cache-key/src/canonical_url.rs +++ b/crates/uv-cache-key/src/canonical_url.rs @@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher}; use std::ops::Deref; use url::Url; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use crate::cache_key::{CacheKey, CacheKeyHasher}; @@ -98,7 +98,7 @@ impl CanonicalUrl { Self(url) } - pub fn parse(url: &str) -> Result { + pub fn parse(url: &str) -> Result { Ok(Self::new(&DisplaySafeUrl::parse(url)?)) } } @@ -164,7 +164,7 @@ impl RepositoryUrl { Self(url) } - pub fn parse(url: &str) -> Result { + pub fn parse(url: &str) -> Result { Ok(Self::new(&DisplaySafeUrl::parse(url)?)) } } @@ -204,7 +204,7 @@ mod tests { use super::*; #[test] - fn user_credential_does_not_affect_cache_key() -> Result<(), url::ParseError> { + fn user_credential_does_not_affect_cache_key() -> Result<(), DisplaySafeUrlError> { let mut hasher = CacheKeyHasher::new(); CanonicalUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")? .cache_key(&mut hasher); @@ -254,7 +254,7 @@ mod tests { } #[test] - fn canonical_url() -> Result<(), url::ParseError> { + fn canonical_url() -> Result<(), DisplaySafeUrlError> { // Two URLs should be considered equal regardless of the `.git` suffix. assert_eq!( CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?, @@ -335,7 +335,7 @@ mod tests { } #[test] - fn repository_url() -> Result<(), url::ParseError> { + fn repository_url() -> Result<(), DisplaySafeUrlError> { // Two URLs should be considered equal regardless of the `.git` suffix. assert_eq!( RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?, diff --git a/crates/uv-client/src/base_client.rs b/crates/uv-client/src/base_client.rs index 0c897957d..a834e5e75 100644 --- a/crates/uv-client/src/base_client.rs +++ b/crates/uv-client/src/base_client.rs @@ -35,6 +35,7 @@ use uv_pep508::MarkerEnvironment; use uv_platform_tags::Platform; use uv_preview::Preview; use uv_redacted::DisplaySafeUrl; +use uv_redacted::DisplaySafeUrlError; use uv_static::EnvVars; use uv_version::version; use uv_warnings::warn_user_once; @@ -577,7 +578,7 @@ impl BaseClient { /// Executes a request, applying redirect policy. pub async fn execute(&self, req: Request) -> reqwest_middleware::Result { - let client = self.for_host(&DisplaySafeUrl::from(req.url().clone())); + let client = self.for_host(&DisplaySafeUrl::from_url(req.url().clone())); client.execute(req).await } @@ -707,7 +708,7 @@ fn request_into_redirect( res: &Response, cross_origin_credentials_policy: CrossOriginCredentialsPolicy, ) -> reqwest_middleware::Result> { - let original_req_url = DisplaySafeUrl::from(req.url().clone()); + let original_req_url = DisplaySafeUrl::from_url(req.url().clone()); let status = res.status(); let should_redirect = match status { StatusCode::MOVED_PERMANENTLY @@ -760,7 +761,7 @@ fn request_into_redirect( let mut redirect_url = match DisplaySafeUrl::parse(location) { Ok(url) => url, // Per RFC 7231, URLs should be resolved against the request URL. - Err(ParseError::RelativeUrlWithoutBase) => original_req_url.join(location).map_err(|err| { + Err(DisplaySafeUrlError::Url(ParseError::RelativeUrlWithoutBase)) => original_req_url.join(location).map_err(|err| { reqwest_middleware::Error::Middleware(anyhow!( "Invalid HTTP {status} 'Location' value `{location}` relative to `{original_req_url}`: {err}" )) diff --git a/crates/uv-client/src/cached_client.rs b/crates/uv-client/src/cached_client.rs index 6c31a6515..d91cc717a 100644 --- a/crates/uv-client/src/cached_client.rs +++ b/crates/uv-client/src/cached_client.rs @@ -557,7 +557,7 @@ impl CachedClient { cached: DataWithCachePolicy, new_cache_policy_builder: CachePolicyBuilder, ) -> Result { - let url = DisplaySafeUrl::from(req.url().clone()); + let url = DisplaySafeUrl::from_url(req.url().clone()); debug!("Sending revalidation request for: {url}"); let mut response = self .0 @@ -627,7 +627,7 @@ impl CachedClient { req: Request, cache_control: CacheControl<'_>, ) -> Result<(Response, Option>), Error> { - let url = DisplaySafeUrl::from(req.url().clone()); + let url = DisplaySafeUrl::from_url(req.url().clone()); trace!("Sending fresh {} request for {}", req.method(), url); let cache_policy_builder = CachePolicyBuilder::new(&req); let mut response = self diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index 217ab4d9d..5e898f38c 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -189,7 +189,7 @@ impl<'a> FlatIndexClient<'a> { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. - let url = DisplaySafeUrl::from(response.url().clone()); + let url = DisplaySafeUrl::from_url(response.url().clone()); let text = response .text() diff --git a/crates/uv-client/src/html.rs b/crates/uv-client/src/html.rs index 1233096e6..f94d63b70 100644 --- a/crates/uv-client/src/html.rs +++ b/crates/uv-client/src/html.rs @@ -8,7 +8,7 @@ use uv_normalize::PackageName; use uv_pep440::VersionSpecifiers; use uv_pypi_types::{BaseUrl, CoreMetadata, Hashes, PypiFile, Yanked}; use uv_pypi_types::{HashError, LenientVersionSpecifiers}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; /// A parsed structure from PyPI "HTML" index format for a single package. #[derive(Debug, Clone)] @@ -285,7 +285,7 @@ pub enum Error { FromUtf8(#[from] std::string::FromUtf8Error), #[error("Failed to parse URL: {0}")] - UrlParse(String, #[source] url::ParseError), + UrlParse(String, #[source] DisplaySafeUrlError), #[error(transparent)] HtmlParse(#[from] tl::ParseError), diff --git a/crates/uv-client/src/middleware.rs b/crates/uv-client/src/middleware.rs index f3a899b73..55a6272fa 100644 --- a/crates/uv-client/src/middleware.rs +++ b/crates/uv-client/src/middleware.rs @@ -43,7 +43,7 @@ impl Middleware for OfflineMiddleware { ) -> reqwest_middleware::Result { Err(reqwest_middleware::Error::Middleware( OfflineError { - url: DisplaySafeUrl::from(req.url().clone()), + url: DisplaySafeUrl::from_url(req.url().clone()), } .into(), )) diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index e846ab640..79b6025f2 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -587,7 +587,7 @@ impl RegistryClient { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. - let url = DisplaySafeUrl::from(response.url().clone()); + let url = DisplaySafeUrl::from_url(response.url().clone()); let content_type = response .headers() @@ -766,7 +766,7 @@ impl RegistryClient { async { // Use the response URL, rather than the request URL, as the base for relative URLs. // This ensures that we handle redirects and other URL transformations correctly. - let url = DisplaySafeUrl::from(response.url().clone()); + let url = DisplaySafeUrl::from_url(response.url().clone()); let content_type = response .headers() diff --git a/crates/uv-distribution-types/src/file.rs b/crates/uv-distribution-types/src/file.rs index cf014a14a..581d67337 100644 --- a/crates/uv-distribution-types/src/file.rs +++ b/crates/uv-distribution-types/src/file.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError}; use uv_pep508::split_scheme; use uv_pypi_types::{CoreMetadata, HashDigests, Yanked}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_small_str::SmallString; /// Error converting [`uv_pypi_types::PypiFile`] to [`distribution_type::File`]. @@ -272,7 +272,7 @@ pub enum ToUrlError { base: String, /// The underlying URL parse error. #[source] - err: url::ParseError, + err: DisplaySafeUrlError, }, /// An error that occurs when the base URL could not be joined with /// the relative path in a [`FileLocation::Relative`]. @@ -284,7 +284,7 @@ pub enum ToUrlError { path: String, /// The underlying URL parse error. #[source] - err: url::ParseError, + err: DisplaySafeUrlError, }, /// An error that occurs when the absolute URL in [`FileLocation::Absolute`] /// could not be parsed as a valid URL. @@ -294,7 +294,7 @@ pub enum ToUrlError { absolute: String, /// The underlying URL parse error. #[source] - err: url::ParseError, + err: DisplaySafeUrlError, }, } diff --git a/crates/uv-distribution-types/src/requirement.rs b/crates/uv-distribution-types/src/requirement.rs index 25676d999..b64c7df7a 100644 --- a/crates/uv-distribution-types/src/requirement.rs +++ b/crates/uv-distribution-types/src/requirement.rs @@ -13,7 +13,7 @@ use uv_pep440::VersionSpecifiers; use uv_pep508::{ MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl, marker, }; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use crate::{IndexMetadata, IndexUrl}; @@ -29,7 +29,7 @@ pub enum RequirementError { #[error(transparent)] ParsedUrlError(#[from] ParsedUrlError), #[error(transparent)] - UrlParseError(#[from] url::ParseError), + UrlParseError(#[from] DisplaySafeUrlError), #[error(transparent)] OidParseError(#[from] OidParseError), #[error(transparent)] diff --git a/crates/uv-distribution/src/metadata/lowering.rs b/crates/uv-distribution/src/metadata/lowering.rs index 627e2d4dd..152f68030 100644 --- a/crates/uv-distribution/src/metadata/lowering.rs +++ b/crates/uv-distribution/src/metadata/lowering.rs @@ -14,7 +14,7 @@ use uv_normalize::{ExtraName, GroupName, PackageName}; use uv_pep440::VersionSpecifiers; use uv_pep508::{MarkerTree, VerbatimUrl, VersionOrUrl, looks_like_git_repository}; use uv_pypi_types::{ConflictItem, ParsedGitUrl, ParsedUrlError, VerbatimParsedUrl}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_workspace::Workspace; use uv_workspace::pyproject::{PyProjectToml, Source, Sources}; @@ -527,7 +527,7 @@ pub enum LoweringError { #[error("Workspace members are not allowed in non-workspace contexts")] WorkspaceMember, #[error(transparent)] - InvalidUrl(#[from] url::ParseError), + InvalidUrl(#[from] DisplaySafeUrlError), #[error(transparent)] InvalidVerbatimUrl(#[from] uv_pep508::VerbatimUrlError), #[error("Fragments are not allowed in URLs: `{0}`")] diff --git a/crates/uv-pep508/src/unnamed.rs b/crates/uv-pep508/src/unnamed.rs index d5c1820bb..9dbd854de 100644 --- a/crates/uv-pep508/src/unnamed.rs +++ b/crates/uv-pep508/src/unnamed.rs @@ -48,7 +48,7 @@ impl UnnamedRequirementUrl for VerbatimUrl { } fn parse_unnamed_url(given: impl AsRef) -> Result { - Ok(Self::parse_url(given)?) + Self::parse_url(given) } fn with_given(self, given: impl AsRef) -> Self { diff --git a/crates/uv-pep508/src/verbatim_url.rs b/crates/uv-pep508/src/verbatim_url.rs index 12ce67105..965ef698f 100644 --- a/crates/uv-pep508/src/verbatim_url.rs +++ b/crates/uv-pep508/src/verbatim_url.rs @@ -9,12 +9,12 @@ use std::sync::LazyLock; use arcstr::ArcStr; use regex::Regex; use thiserror::Error; -use url::{ParseError, Url}; +use url::Url; use uv_cache_key::{CacheKey, CacheKeyHasher}; #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused_imports))] use uv_fs::{normalize_absolute_path, normalize_url_path}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use crate::Pep508Url; @@ -57,8 +57,10 @@ impl VerbatimUrl { } /// Parse a URL from a string. - pub fn parse_url(given: impl AsRef) -> Result { - let url = DisplaySafeUrl::parse(given.as_ref())?; + pub fn parse_url(given: impl AsRef) -> Result { + let given = given.as_ref(); + let url = DisplaySafeUrl::parse(given)?; + Ok(Self { url, given: None }) } @@ -251,7 +253,7 @@ impl std::str::FromStr for VerbatimUrl { type Err = VerbatimUrlError; fn from_str(s: &str) -> Result { - Ok(Self::parse_url(s).map(|url| url.with_given(s))?) + Self::parse_url(s).map(|url| url.with_given(s)) } } @@ -271,7 +273,7 @@ impl Deref for VerbatimUrl { impl From for VerbatimUrl { fn from(url: Url) -> Self { - Self::from_url(DisplaySafeUrl::from(url)) + Self::from_url(DisplaySafeUrl::from_url(url)) } } @@ -390,7 +392,7 @@ impl Pep508Url for VerbatimUrl { pub enum VerbatimUrlError { /// Failed to parse a URL. #[error(transparent)] - Url(#[from] ParseError), + Url(#[from] DisplaySafeUrlError), /// Received a relative path, but no working directory was provided. #[error("relative path without a working directory: {0}")] @@ -645,6 +647,8 @@ impl std::fmt::Display for Scheme { #[cfg(test)] mod tests { + use insta::assert_snapshot; + use super::*; #[test] @@ -749,4 +753,27 @@ mod tests { let url = Url::parse("https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ee9982d4bbb3c72346a6de940a148ea686").unwrap(); assert!(!looks_like_git_repository(&url)); } + + #[test] + fn parse_url_ambiguous() { + assert_snapshot!( + VerbatimUrl::parse_url("https://user/name:password@domain/a/b/c").unwrap_err().to_string(), + @"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c" + ); + + assert_snapshot!( + VerbatimUrl::parse_url("https://user\\name:password@domain/a/b/c").unwrap_err().to_string(), + @"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c" + ); + + assert_snapshot!( + VerbatimUrl::parse_url("https://user#name:password@domain/a/b/c").unwrap_err().to_string(), + @"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c" + ); + + assert_snapshot!( + VerbatimUrl::parse_url("https://user.com/name:password@domain/a/b/c").unwrap_err().to_string(), + @"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c" + ); + } } diff --git a/crates/uv-publish/src/trusted_publishing.rs b/crates/uv-publish/src/trusted_publishing.rs index 4e6a9d79c..7632fe30b 100644 --- a/crates/uv-publish/src/trusted_publishing.rs +++ b/crates/uv-publish/src/trusted_publishing.rs @@ -10,13 +10,13 @@ use std::fmt::Display; use thiserror::Error; use tracing::{debug, trace}; use url::Url; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_static::EnvVars; #[derive(Debug, Error)] pub enum TrustedPublishingError { #[error(transparent)] - Url(#[from] url::ParseError), + Url(#[from] DisplaySafeUrlError), #[error("Failed to obtain OIDC token: is the `id-token: write` permission missing?")] GitHubPermissions(#[source] ambient_id::Error), /// A hard failure during OIDC token discovery. diff --git a/crates/uv-pypi-types/src/direct_url.rs b/crates/uv-pypi-types/src/direct_url.rs index dcb7a4f83..2c77d1449 100644 --- a/crates/uv-pypi-types/src/direct_url.rs +++ b/crates/uv-pypi-types/src/direct_url.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use std::path::Path; use serde::{Deserialize, Serialize}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; /// Metadata for a distribution that was installed via a direct URL. /// @@ -93,7 +93,7 @@ impl std::fmt::Display for VcsKind { } impl TryFrom<&DirectUrl> for DisplaySafeUrl { - type Error = url::ParseError; + type Error = DisplaySafeUrlError; fn try_from(value: &DirectUrl) -> Result { match value { diff --git a/crates/uv-pypi-types/src/parsed_url.rs b/crates/uv-pypi-types/src/parsed_url.rs index 3b3b21f17..e49792a96 100644 --- a/crates/uv-pypi-types/src/parsed_url.rs +++ b/crates/uv-pypi-types/src/parsed_url.rs @@ -2,7 +2,7 @@ use std::fmt::{Display, Formatter}; use std::path::{Path, PathBuf}; use thiserror::Error; -use url::{ParseError, Url}; +use url::Url; use uv_cache_key::{CacheKey, CacheKeyHasher}; use uv_distribution_filename::{DistExtension, ExtensionError}; @@ -10,7 +10,7 @@ use uv_git_types::{GitUrl, GitUrlParseError}; use uv_pep508::{ Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, looks_like_git_repository, }; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind}; @@ -27,7 +27,7 @@ pub enum ParsedUrlError { #[error(transparent)] GitUrlParse(#[from] GitUrlParseError), #[error("Not a valid URL: `{0}`")] - UrlParse(String, #[source] ParseError), + UrlParse(String, #[source] DisplaySafeUrlError), #[error(transparent)] VerbatimUrl(#[from] VerbatimUrlError), #[error( diff --git a/crates/uv-python/src/downloads.rs b/crates/uv-python/src/downloads.rs index 792abfd90..1a892982b 100644 --- a/crates/uv-python/src/downloads.rs +++ b/crates/uv-python/src/downloads.rs @@ -28,7 +28,7 @@ use uv_extract::hash::Hasher; use uv_fs::{Simplified, rename_with_retry}; use uv_platform::{self as platform, Arch, Libc, Os, Platform}; use uv_pypi_types::{HashAlgorithm, HashDigest}; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_static::EnvVars; use crate::PythonVariant; @@ -75,7 +75,7 @@ pub enum Error { actual: String, }, #[error("Invalid download URL")] - InvalidUrl(#[from] url::ParseError), + InvalidUrl(#[from] DisplaySafeUrlError), #[error("Invalid download URL: {0}")] InvalidUrlFormat(DisplaySafeUrl), #[error("Invalid path in file URL: `{0}`")] diff --git a/crates/uv-redacted/Cargo.toml b/crates/uv-redacted/Cargo.toml index 52fca3f89..c32a82180 100644 --- a/crates/uv-redacted/Cargo.toml +++ b/crates/uv-redacted/Cargo.toml @@ -19,6 +19,7 @@ workspace = true ref-cast = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true } +thiserror = { workspace = true } url = { workspace = true } [features] diff --git a/crates/uv-redacted/src/lib.rs b/crates/uv-redacted/src/lib.rs index dac647ac1..c6cb2d5aa 100644 --- a/crates/uv-redacted/src/lib.rs +++ b/crates/uv-redacted/src/lib.rs @@ -4,8 +4,21 @@ use std::borrow::Cow; use std::fmt::{Debug, Display}; use std::ops::{Deref, DerefMut}; use std::str::FromStr; +use thiserror::Error; use url::Url; +#[derive(Error, Debug, Clone, PartialEq, Eq)] +pub enum DisplaySafeUrlError { + /// Failed to parse a URL. + #[error(transparent)] + Url(#[from] url::ParseError), + + /// We parsed a URL, but couldn't disambiguate its authority + /// component. + #[error("ambiguous user/pass authority in URL (not percent-encoded?): {0}")] + AmbiguousAuthority(String), +} + /// A [`Url`] wrapper that redacts credentials when displaying the URL. /// /// `DisplaySafeUrl` wraps the standard [`url::Url`] type, providing functionality to mask @@ -47,8 +60,55 @@ pub struct DisplaySafeUrl(Url); impl DisplaySafeUrl { #[inline] - pub fn parse(input: &str) -> Result { - Ok(Self(Url::parse(input)?)) + pub fn parse(input: &str) -> Result { + let url = Url::parse(input)?; + + // Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c` + // + // In this case the user *probably* meant to have a username of "user/name", but both RFC + // 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a + // non-percent-encoded slash or other special character. + // + // This ends up being moderately annoying to detect, since the above gets parsed into a + // "valid" WHATWG URL where the host is `used` and the pathname is + // `/name:password@domain/a/b/c` rather than causing a parse error. + // + // To detect it, we use a heuristic: if the password component is missing but the path or + // fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous. + if url.password().is_none() + && (url + .path() + .find(':') + .is_some_and(|pos| url.path()[pos..].contains('@')) + || url + .fragment() + .map(|fragment| { + fragment + .find(':') + .is_some_and(|pos| fragment[pos..].contains('@')) + }) + .unwrap_or(false)) + // If the above is true, we should always expect to find these in the given URL + && let Some(col_pos) = input.find(':') + && let Some(at_pos) = input.rfind('@') + { + // Our ambiguous URL probably has credentials in it, so we don't want to blast it out in + // the error message. We somewhat aggressively replace everything between the scheme's + // ':' and the lastmost `@` with `***`. + let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]); + return Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path)); + } + + Ok(Self(url)) + } + + /// Create a new [`DisplaySafeUrl`] from a [`Url`]. + /// + /// Unlike [`Self::parse`], this doesn't perform any ambiguity checks. + /// That means that it's primarily useful for contexts where a human can't easily accidentally + /// introduce an ambiguous URL, such as URLs being read from a request. + pub fn from_url(url: Url) -> Self { + Self(url) } /// Cast a `&Url` to a `&DisplaySafeUrl` using ref-cast. @@ -59,8 +119,8 @@ impl DisplaySafeUrl { /// Parse a string as an URL, with this URL as the base URL. #[inline] - pub fn join(&self, input: &str) -> Result { - self.0.join(input).map(Self) + pub fn join(&self, input: &str) -> Result { + Ok(Self(self.0.join(input)?)) } /// Serialize with Serde using the internal representation of the `Url` struct. @@ -78,12 +138,12 @@ impl DisplaySafeUrl { where D: serde::Deserializer<'de>, { - Url::deserialize_internal(deserializer).map(Self) + Ok(Self(Url::deserialize_internal(deserializer)?)) } #[allow(clippy::result_unit_err)] pub fn from_file_path>(path: P) -> Result { - Url::from_file_path(path).map(Self::from) + Ok(Self(Url::from_file_path(path)?)) } /// Remove the credentials from a URL, allowing the generic `git` username (without a password) @@ -175,12 +235,6 @@ impl Debug for DisplaySafeUrl { } } -impl From for DisplaySafeUrl { - fn from(url: Url) -> Self { - Self(url) - } -} - impl From for Url { fn from(url: DisplaySafeUrl) -> Self { url.0 @@ -188,10 +242,10 @@ impl From for Url { } impl FromStr for DisplaySafeUrl { - type Err = url::ParseError; + type Err = DisplaySafeUrlError; fn from_str(input: &str) -> Result { - Ok(Self(Url::from_str(input)?)) + Self::parse(input) } } @@ -250,8 +304,8 @@ mod tests { #[test] fn from_url_no_credentials() { let url_str = "https://pypi-proxy.fly.dev/basic-auth/simple"; - let url = DisplaySafeUrl::parse(url_str).unwrap(); - let log_safe_url = url; + let log_safe_url = + DisplaySafeUrl::parse("https://pypi-proxy.fly.dev/basic-auth/simple").unwrap(); assert_eq!(log_safe_url.username(), ""); assert!(log_safe_url.password().is_none()); assert_eq!(log_safe_url.to_string(), url_str); @@ -259,9 +313,9 @@ mod tests { #[test] fn from_url_username_and_password() { - let url_str = "https://user:pass@pypi-proxy.fly.dev/basic-auth/simple"; - let url = DisplaySafeUrl::parse(url_str).unwrap(); - let log_safe_url = url; + let log_safe_url = + DisplaySafeUrl::parse("https://user:pass@pypi-proxy.fly.dev/basic-auth/simple") + .unwrap(); assert_eq!(log_safe_url.username(), "user"); assert!(log_safe_url.password().is_some_and(|p| p == "pass")); assert_eq!( @@ -272,9 +326,8 @@ mod tests { #[test] fn from_url_just_password() { - let url_str = "https://:pass@pypi-proxy.fly.dev/basic-auth/simple"; - let url = DisplaySafeUrl::parse(url_str).unwrap(); - let log_safe_url = url; + let log_safe_url = + DisplaySafeUrl::parse("https://:pass@pypi-proxy.fly.dev/basic-auth/simple").unwrap(); assert_eq!(log_safe_url.username(), ""); assert!(log_safe_url.password().is_some_and(|p| p == "pass")); assert_eq!( @@ -285,9 +338,8 @@ mod tests { #[test] fn from_url_just_username() { - let url_str = "https://user@pypi-proxy.fly.dev/basic-auth/simple"; - let url = DisplaySafeUrl::parse(url_str).unwrap(); - let log_safe_url = url; + let log_safe_url = + DisplaySafeUrl::parse("https://user@pypi-proxy.fly.dev/basic-auth/simple").unwrap(); assert_eq!(log_safe_url.username(), "user"); assert!(log_safe_url.password().is_none()); assert_eq!( @@ -383,4 +435,22 @@ mod tests { "https://user:****@pypi-proxy.fly.dev/basic-auth/simple" ); } + + #[test] + fn parse_url_ambiguous() { + for url in &[ + "https://user/name:password@domain/a/b/c", + "https://user\\name:password@domain/a/b/c", + "https://user#name:password@domain/a/b/c", + "https://user.com/name:password@domain/a/b/c", + ] { + let err = DisplaySafeUrl::parse(url).unwrap_err(); + match err { + DisplaySafeUrlError::AmbiguousAuthority(redacted) => { + assert!(redacted.starts_with("https:***@domain/a/b/c")); + } + DisplaySafeUrlError::Url(_) => panic!("expected AmbiguousAuthority error"), + } + } + } } diff --git a/crates/uv-requirements-txt/src/lib.rs b/crates/uv-requirements-txt/src/lib.rs index e91e152ca..5cca0fba4 100644 --- a/crates/uv-requirements-txt/src/lib.rs +++ b/crates/uv-requirements-txt/src/lib.rs @@ -57,6 +57,7 @@ use uv_pep508::{Pep508Error, RequirementOrigin, VerbatimUrl, expand_env_vars}; use uv_pypi_types::VerbatimParsedUrl; #[cfg(feature = "http")] use uv_redacted::DisplaySafeUrl; +use uv_redacted::DisplaySafeUrlError; use crate::requirement::EditableError; pub use crate::requirement::RequirementsTxtRequirement; @@ -282,7 +283,7 @@ impl RequirementsTxt { requirements_txt.join( Url::parse(filename.as_ref()) .map_err(|err| RequirementsTxtParserError::Url { - source: err, + source: DisplaySafeUrlError::Url(err).into(), url: filename.to_string(), start, end, @@ -356,7 +357,7 @@ impl RequirementsTxt { requirements_txt.join( Url::parse(filename.as_ref()) .map_err(|err| RequirementsTxtParserError::Url { - source: err, + source: DisplaySafeUrlError::Url(err).into(), url: filename.to_string(), start, end, @@ -1041,7 +1042,7 @@ pub struct RequirementsTxtFileError { pub enum RequirementsTxtParserError { Io(io::Error), Url { - source: url::ParseError, + source: uv_pep508::VerbatimUrlError, url: String, start: usize, end: usize, @@ -1112,7 +1113,7 @@ pub enum RequirementsTxtParserError { #[cfg(feature = "http")] Reqwest(DisplaySafeUrl, reqwest_middleware::Error), #[cfg(feature = "http")] - InvalidUrl(String, url::ParseError), + InvalidUrl(String, DisplaySafeUrlError), } impl Display for RequirementsTxtParserError { @@ -1184,7 +1185,15 @@ impl Display for RequirementsTxtParserError { } #[cfg(feature = "http")] Self::InvalidUrl(url, err) => { - write!(f, "Not a valid URL, {err}: `{url}`") + match err { + DisplaySafeUrlError::Url(err) => write!(f, "Not a valid URL, {err}: `{url}`"), + DisplaySafeUrlError::AmbiguousAuthority(_) => { + // Intentionally avoid leaking the URL here, since we suspect that the user + // has given us an ambiguous URL that contains sensitive information. + // The error's own Display will provide a redacted version of the URL. + write!(f, "Invalid URL: {err}") + } + } } } } @@ -1343,9 +1352,15 @@ impl Display for RequirementsTxtFileError { write!(f, "Error while accessing remote requirements file: `{url}`") } #[cfg(feature = "http")] - RequirementsTxtParserError::InvalidUrl(url, err) => { - write!(f, "Not a valid URL, {err}: `{url}`") - } + RequirementsTxtParserError::InvalidUrl(url, err) => match err { + DisplaySafeUrlError::Url(err) => write!(f, "Not a valid URL, {err}: `{url}`"), + DisplaySafeUrlError::AmbiguousAuthority(_) => { + // Intentionally avoid leaking the URL here, since we suspect that the user + // has given us an ambiguous URL that contains sensitive information. + // The error's own Display will provide a redacted version of the URL. + write!(f, "Invalid URL: {err}") + } + }, } } } diff --git a/crates/uv-requirements/src/source_tree.rs b/crates/uv-requirements/src/source_tree.rs index d7e2dc674..47a85a1c8 100644 --- a/crates/uv-requirements/src/source_tree.rs +++ b/crates/uv-requirements/src/source_tree.rs @@ -178,7 +178,7 @@ impl<'a, Context: BuildContext> SourceTreeResolver<'a, Context> { } } - let Ok(url) = Url::from_directory_path(path).map(DisplaySafeUrl::from) else { + let Ok(url) = Url::from_directory_path(path).map(DisplaySafeUrl::from_url) else { return Err(anyhow::anyhow!("Failed to convert path to URL")); }; let source = SourceUrl::Directory(DirectorySourceUrl { diff --git a/crates/uv-resolver/src/lock/export/pylock_toml.rs b/crates/uv-resolver/src/lock/export/pylock_toml.rs index 2748a65d9..4a96d6fa8 100644 --- a/crates/uv-resolver/src/lock/export/pylock_toml.rs +++ b/crates/uv-resolver/src/lock/export/pylock_toml.rs @@ -1420,7 +1420,7 @@ impl PylockTomlVcs { let mut url = if let Some(url) = self.url.as_ref() { url.clone() } else if let Some(path) = self.path.as_ref() { - DisplaySafeUrl::from( + DisplaySafeUrl::from_url( Url::from_directory_path(install_path.join(path)) .map_err(|()| PylockTomlErrorKind::PathToUrl)?, ) diff --git a/crates/uv-resolver/src/lock/mod.rs b/crates/uv-resolver/src/lock/mod.rs index 361249179..689e7f4c4 100644 --- a/crates/uv-resolver/src/lock/mod.rs +++ b/crates/uv-resolver/src/lock/mod.rs @@ -44,7 +44,7 @@ use uv_pypi_types::{ ConflictKind, Conflicts, HashAlgorithm, HashDigest, HashDigests, Hashes, ParsedArchiveUrl, ParsedGitUrl, PyProjectToml, }; -use uv_redacted::DisplaySafeUrl; +use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError}; use uv_small_str::SmallString; use uv_types::{BuildContext, HashStrategy}; use uv_workspace::{Editability, WorkspaceMember}; @@ -5975,7 +5975,7 @@ enum SourceParseError { given: String, /// The URL parse error. #[source] - err: url::ParseError, + err: DisplaySafeUrlError, }, /// An error that occurs when a Git URL is missing a precise commit SHA. #[error("Missing SHA in source `{given}`")] diff --git a/crates/uv-resolver/src/redirect.rs b/crates/uv-resolver/src/redirect.rs index 7d2539aba..6d0696cd6 100644 --- a/crates/uv-resolver/src/redirect.rs +++ b/crates/uv-resolver/src/redirect.rs @@ -84,13 +84,13 @@ fn apply_redirect(url: &VerbatimUrl, redirect: DisplaySafeUrl) -> VerbatimUrl { #[cfg(test)] mod tests { - use uv_pep508::VerbatimUrl; + use uv_pep508::{VerbatimUrl, VerbatimUrlError}; use uv_redacted::DisplaySafeUrl; use crate::redirect::apply_redirect; #[test] - fn test_apply_redirect() -> Result<(), url::ParseError> { + fn test_apply_redirect() -> Result<(), VerbatimUrlError> { // If there's no `@` in the original representation, we can just append the precise suffix // to the given representation. let verbatim = VerbatimUrl::parse_url("https://github.com/flask.git")? diff --git a/crates/uv/tests/it/sync.rs b/crates/uv/tests/it/sync.rs index 7765b1b25..fad338270 100644 --- a/crates/uv/tests/it/sync.rs +++ b/crates/uv/tests/it/sync.rs @@ -14717,3 +14717,46 @@ fn sync_no_sources_editable_to_package_switch() -> Result<()> { Ok(()) } + +#[test] +fn sync_fails_ambiguous_url() -> Result<()> { + let context = TestContext::new("3.12"); + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "project" + version = "0.1.0" + requires-python = ">=3.12" + dependencies = ["anyio==3.7.0"] + + [[tool.uv.index]] + name = "bug" + url = "https://user/name:password@domain/a/b/c" + default = true + "#, + )?; + + uv_snapshot!(context.filters(), context.sync(), @r#" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Failed to parse `pyproject.toml` during settings discovery: + TOML parse error at line 10, column 15 + | + 10 | url = "https://user/name:password@domain/a/b/c" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c + + error: Failed to parse: `pyproject.toml` + Caused by: TOML parse error at line 10, column 15 + | + 10 | url = "https://user/name:password@domain/a/b/c" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c + "#); + + Ok(()) +}