Reject ambiguously parsed URLs (#16622)

Co-authored-by: Zanie Blue <contact@zanie.dev>
This commit is contained in:
William Woodruff 2025-11-12 11:27:57 -05:00 committed by GitHub
parent 82c612704a
commit ae1edef9c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 257 additions and 97 deletions

2
Cargo.lock generated
View file

@ -5463,6 +5463,7 @@ dependencies = [
"uv-extract",
"uv-pep440",
"uv-platform",
"uv-redacted",
]
[[package]]
@ -6437,6 +6438,7 @@ dependencies = [
"ref-cast",
"schemars",
"serde",
"thiserror 2.0.17",
"url",
]

View file

@ -502,7 +502,7 @@ impl AuthMiddleware {
// Nothing to insert into the cache if we don't have credentials
return next.run(request, extensions).await;
};
let url = DisplaySafeUrl::from(request.url().clone());
let url = DisplaySafeUrl::from_url(request.url().clone());
if matches!(auth_policy, AuthPolicy::Always) && credentials.password().is_none() {
return Err(Error::Middleware(format_err!("Missing password for {url}")));
}
@ -801,7 +801,7 @@ impl AuthMiddleware {
}
fn tracing_url(request: &Request, credentials: Option<&Authentication>) -> DisplaySafeUrl {
let mut url = DisplaySafeUrl::from(request.url().clone());
let mut url = DisplaySafeUrl::from_url(request.url().clone());
if let Some(Authentication::Credentials(creds)) = credentials {
if let Some(username) = creds.username() {
let _ = url.set_username(username);
@ -1990,13 +1990,13 @@ mod tests {
let base_url_2 = base_url.join("prefix_2")?;
let indexes = Indexes::from_indexes(vec![
Index {
url: DisplaySafeUrl::from(base_url_1.clone()),
root_url: DisplaySafeUrl::from(base_url_1.clone()),
url: DisplaySafeUrl::from_url(base_url_1.clone()),
root_url: DisplaySafeUrl::from_url(base_url_1.clone()),
auth_policy: AuthPolicy::Auto,
},
Index {
url: DisplaySafeUrl::from(base_url_2.clone()),
root_url: DisplaySafeUrl::from(base_url_2.clone()),
url: DisplaySafeUrl::from_url(base_url_2.clone()),
root_url: DisplaySafeUrl::from_url(base_url_2.clone()),
auth_policy: AuthPolicy::Auto,
},
]);
@ -2098,8 +2098,8 @@ mod tests {
let base_url = Url::parse(&server.uri())?;
let index_url = base_url.join("prefix_1")?;
let indexes = Indexes::from_indexes(vec![Index {
url: DisplaySafeUrl::from(index_url.clone()),
root_url: DisplaySafeUrl::from(index_url.clone()),
url: DisplaySafeUrl::from_url(index_url.clone()),
root_url: DisplaySafeUrl::from_url(index_url.clone()),
auth_policy: AuthPolicy::Auto,
}]);
@ -2153,7 +2153,7 @@ mod tests {
}
fn indexes_for(url: &Url, policy: AuthPolicy) -> Indexes {
let mut url = DisplaySafeUrl::from(url.clone());
let mut url = DisplaySafeUrl::from_url(url.clone());
url.set_password(None).ok();
url.set_username("").ok();
Indexes::from_indexes(vec![Index {

View file

@ -10,7 +10,7 @@ use tracing::debug;
use url::Url;
use uv_cache_key::CanonicalUrl;
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_small_str::SmallString;
use uv_state::{StateBucket, StateStore};
use uv_static::EnvVars;
@ -473,7 +473,7 @@ impl PyxTokenStore {
#[derive(thiserror::Error, Debug)]
pub enum TokenStoreError {
#[error(transparent)]
Url(#[from] url::ParseError),
Url(#[from] DisplaySafeUrlError),
#[error(transparent)]
Io(#[from] io::Error),
#[error(transparent)]

View file

@ -2,12 +2,12 @@ use serde::{Deserialize, Serialize};
use std::str::FromStr;
use thiserror::Error;
use url::Url;
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
#[derive(Error, Debug)]
pub enum ServiceParseError {
#[error(transparent)]
InvalidUrl(#[from] url::ParseError),
InvalidUrl(#[from] DisplaySafeUrlError),
#[error("Unsupported scheme: {0}")]
UnsupportedScheme(String),
#[error("HTTPS is required for non-local hosts")]
@ -51,7 +51,7 @@ impl FromStr for Service {
// First try parsing as-is
let url = match DisplaySafeUrl::parse(s) {
Ok(url) => url,
Err(url::ParseError::RelativeUrlWithoutBase) => {
Err(DisplaySafeUrlError::Url(url::ParseError::RelativeUrlWithoutBase)) => {
// If it's a relative URL, try prepending https://
let with_https = format!("https://{s}");
DisplaySafeUrl::parse(&with_https)?

View file

@ -23,6 +23,7 @@ uv-distribution-filename = { workspace = true }
uv-extract = { workspace = true }
uv-pep440 = { workspace = true }
uv-platform = { workspace = true }
uv-redacted = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
reqwest = { workspace = true }
@ -34,4 +35,3 @@ tokio = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }

View file

@ -24,6 +24,7 @@ use uv_client::{BaseClient, is_transient_network_error};
use uv_extract::{Error as ExtractError, stream};
use uv_pep440::Version;
use uv_platform::Platform;
use uv_redacted::DisplaySafeUrl;
/// Binary tools that can be installed.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -311,7 +312,7 @@ async fn download_and_unpack(
let temp_dir = tempfile::tempdir_in(cache.bucket(CacheBucket::Binaries))?;
let response = client
.for_host(&download_url.clone().into())
.for_host(&DisplaySafeUrl::from_url(download_url.clone()))
.get(download_url.clone())
.send()
.await

View file

@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher};
use std::ops::Deref;
use url::Url;
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use crate::cache_key::{CacheKey, CacheKeyHasher};
@ -98,7 +98,7 @@ impl CanonicalUrl {
Self(url)
}
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
Ok(Self::new(&DisplaySafeUrl::parse(url)?))
}
}
@ -164,7 +164,7 @@ impl RepositoryUrl {
Self(url)
}
pub fn parse(url: &str) -> Result<Self, url::ParseError> {
pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
Ok(Self::new(&DisplaySafeUrl::parse(url)?))
}
}
@ -204,7 +204,7 @@ mod tests {
use super::*;
#[test]
fn user_credential_does_not_affect_cache_key() -> Result<(), url::ParseError> {
fn user_credential_does_not_affect_cache_key() -> Result<(), DisplaySafeUrlError> {
let mut hasher = CacheKeyHasher::new();
CanonicalUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")?
.cache_key(&mut hasher);
@ -254,7 +254,7 @@ mod tests {
}
#[test]
fn canonical_url() -> Result<(), url::ParseError> {
fn canonical_url() -> Result<(), DisplaySafeUrlError> {
// Two URLs should be considered equal regardless of the `.git` suffix.
assert_eq!(
CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
@ -335,7 +335,7 @@ mod tests {
}
#[test]
fn repository_url() -> Result<(), url::ParseError> {
fn repository_url() -> Result<(), DisplaySafeUrlError> {
// Two URLs should be considered equal regardless of the `.git` suffix.
assert_eq!(
RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,

View file

@ -35,6 +35,7 @@ use uv_pep508::MarkerEnvironment;
use uv_platform_tags::Platform;
use uv_preview::Preview;
use uv_redacted::DisplaySafeUrl;
use uv_redacted::DisplaySafeUrlError;
use uv_static::EnvVars;
use uv_version::version;
use uv_warnings::warn_user_once;
@ -577,7 +578,7 @@ impl BaseClient {
/// Executes a request, applying redirect policy.
pub async fn execute(&self, req: Request) -> reqwest_middleware::Result<Response> {
let client = self.for_host(&DisplaySafeUrl::from(req.url().clone()));
let client = self.for_host(&DisplaySafeUrl::from_url(req.url().clone()));
client.execute(req).await
}
@ -707,7 +708,7 @@ fn request_into_redirect(
res: &Response,
cross_origin_credentials_policy: CrossOriginCredentialsPolicy,
) -> reqwest_middleware::Result<Option<Request>> {
let original_req_url = DisplaySafeUrl::from(req.url().clone());
let original_req_url = DisplaySafeUrl::from_url(req.url().clone());
let status = res.status();
let should_redirect = match status {
StatusCode::MOVED_PERMANENTLY
@ -760,7 +761,7 @@ fn request_into_redirect(
let mut redirect_url = match DisplaySafeUrl::parse(location) {
Ok(url) => url,
// Per RFC 7231, URLs should be resolved against the request URL.
Err(ParseError::RelativeUrlWithoutBase) => original_req_url.join(location).map_err(|err| {
Err(DisplaySafeUrlError::Url(ParseError::RelativeUrlWithoutBase)) => original_req_url.join(location).map_err(|err| {
reqwest_middleware::Error::Middleware(anyhow!(
"Invalid HTTP {status} 'Location' value `{location}` relative to `{original_req_url}`: {err}"
))

View file

@ -557,7 +557,7 @@ impl CachedClient {
cached: DataWithCachePolicy,
new_cache_policy_builder: CachePolicyBuilder,
) -> Result<CachedResponse, Error> {
let url = DisplaySafeUrl::from(req.url().clone());
let url = DisplaySafeUrl::from_url(req.url().clone());
debug!("Sending revalidation request for: {url}");
let mut response = self
.0
@ -627,7 +627,7 @@ impl CachedClient {
req: Request,
cache_control: CacheControl<'_>,
) -> Result<(Response, Option<Box<CachePolicy>>), Error> {
let url = DisplaySafeUrl::from(req.url().clone());
let url = DisplaySafeUrl::from_url(req.url().clone());
trace!("Sending fresh {} request for {}", req.method(), url);
let cache_policy_builder = CachePolicyBuilder::new(&req);
let mut response = self

View file

@ -189,7 +189,7 @@ impl<'a> FlatIndexClient<'a> {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = DisplaySafeUrl::from(response.url().clone());
let url = DisplaySafeUrl::from_url(response.url().clone());
let text = response
.text()

View file

@ -8,7 +8,7 @@ use uv_normalize::PackageName;
use uv_pep440::VersionSpecifiers;
use uv_pypi_types::{BaseUrl, CoreMetadata, Hashes, PypiFile, Yanked};
use uv_pypi_types::{HashError, LenientVersionSpecifiers};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
/// A parsed structure from PyPI "HTML" index format for a single package.
#[derive(Debug, Clone)]
@ -285,7 +285,7 @@ pub enum Error {
FromUtf8(#[from] std::string::FromUtf8Error),
#[error("Failed to parse URL: {0}")]
UrlParse(String, #[source] url::ParseError),
UrlParse(String, #[source] DisplaySafeUrlError),
#[error(transparent)]
HtmlParse(#[from] tl::ParseError),

View file

@ -43,7 +43,7 @@ impl Middleware for OfflineMiddleware {
) -> reqwest_middleware::Result<Response> {
Err(reqwest_middleware::Error::Middleware(
OfflineError {
url: DisplaySafeUrl::from(req.url().clone()),
url: DisplaySafeUrl::from_url(req.url().clone()),
}
.into(),
))

View file

@ -587,7 +587,7 @@ impl RegistryClient {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = DisplaySafeUrl::from(response.url().clone());
let url = DisplaySafeUrl::from_url(response.url().clone());
let content_type = response
.headers()
@ -766,7 +766,7 @@ impl RegistryClient {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = DisplaySafeUrl::from(response.url().clone());
let url = DisplaySafeUrl::from_url(response.url().clone());
let content_type = response
.headers()

View file

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigests, Yanked};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_small_str::SmallString;
/// Error converting [`uv_pypi_types::PypiFile`] to [`distribution_type::File`].
@ -272,7 +272,7 @@ pub enum ToUrlError {
base: String,
/// The underlying URL parse error.
#[source]
err: url::ParseError,
err: DisplaySafeUrlError,
},
/// An error that occurs when the base URL could not be joined with
/// the relative path in a [`FileLocation::Relative`].
@ -284,7 +284,7 @@ pub enum ToUrlError {
path: String,
/// The underlying URL parse error.
#[source]
err: url::ParseError,
err: DisplaySafeUrlError,
},
/// An error that occurs when the absolute URL in [`FileLocation::Absolute`]
/// could not be parsed as a valid URL.
@ -294,7 +294,7 @@ pub enum ToUrlError {
absolute: String,
/// The underlying URL parse error.
#[source]
err: url::ParseError,
err: DisplaySafeUrlError,
},
}

View file

@ -13,7 +13,7 @@ use uv_pep440::VersionSpecifiers;
use uv_pep508::{
MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl, marker,
};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use crate::{IndexMetadata, IndexUrl};
@ -29,7 +29,7 @@ pub enum RequirementError {
#[error(transparent)]
ParsedUrlError(#[from] ParsedUrlError),
#[error(transparent)]
UrlParseError(#[from] url::ParseError),
UrlParseError(#[from] DisplaySafeUrlError),
#[error(transparent)]
OidParseError(#[from] OidParseError),
#[error(transparent)]

View file

@ -14,7 +14,7 @@ use uv_normalize::{ExtraName, GroupName, PackageName};
use uv_pep440::VersionSpecifiers;
use uv_pep508::{MarkerTree, VerbatimUrl, VersionOrUrl, looks_like_git_repository};
use uv_pypi_types::{ConflictItem, ParsedGitUrl, ParsedUrlError, VerbatimParsedUrl};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_workspace::Workspace;
use uv_workspace::pyproject::{PyProjectToml, Source, Sources};
@ -527,7 +527,7 @@ pub enum LoweringError {
#[error("Workspace members are not allowed in non-workspace contexts")]
WorkspaceMember,
#[error(transparent)]
InvalidUrl(#[from] url::ParseError),
InvalidUrl(#[from] DisplaySafeUrlError),
#[error(transparent)]
InvalidVerbatimUrl(#[from] uv_pep508::VerbatimUrlError),
#[error("Fragments are not allowed in URLs: `{0}`")]

View file

@ -48,7 +48,7 @@ impl UnnamedRequirementUrl for VerbatimUrl {
}
fn parse_unnamed_url(given: impl AsRef<str>) -> Result<Self, Self::Err> {
Ok(Self::parse_url(given)?)
Self::parse_url(given)
}
fn with_given(self, given: impl AsRef<str>) -> Self {

View file

@ -9,12 +9,12 @@ use std::sync::LazyLock;
use arcstr::ArcStr;
use regex::Regex;
use thiserror::Error;
use url::{ParseError, Url};
use url::Url;
use uv_cache_key::{CacheKey, CacheKeyHasher};
#[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused_imports))]
use uv_fs::{normalize_absolute_path, normalize_url_path};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use crate::Pep508Url;
@ -57,8 +57,10 @@ impl VerbatimUrl {
}
/// Parse a URL from a string.
pub fn parse_url(given: impl AsRef<str>) -> Result<Self, ParseError> {
let url = DisplaySafeUrl::parse(given.as_ref())?;
pub fn parse_url(given: impl AsRef<str>) -> Result<Self, VerbatimUrlError> {
let given = given.as_ref();
let url = DisplaySafeUrl::parse(given)?;
Ok(Self { url, given: None })
}
@ -251,7 +253,7 @@ impl std::str::FromStr for VerbatimUrl {
type Err = VerbatimUrlError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::parse_url(s).map(|url| url.with_given(s))?)
Self::parse_url(s).map(|url| url.with_given(s))
}
}
@ -271,7 +273,7 @@ impl Deref for VerbatimUrl {
impl From<Url> for VerbatimUrl {
fn from(url: Url) -> Self {
Self::from_url(DisplaySafeUrl::from(url))
Self::from_url(DisplaySafeUrl::from_url(url))
}
}
@ -390,7 +392,7 @@ impl Pep508Url for VerbatimUrl {
pub enum VerbatimUrlError {
/// Failed to parse a URL.
#[error(transparent)]
Url(#[from] ParseError),
Url(#[from] DisplaySafeUrlError),
/// Received a relative path, but no working directory was provided.
#[error("relative path without a working directory: {0}")]
@ -645,6 +647,8 @@ impl std::fmt::Display for Scheme {
#[cfg(test)]
mod tests {
use insta::assert_snapshot;
use super::*;
#[test]
@ -749,4 +753,27 @@ mod tests {
let url = Url::parse("https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ee9982d4bbb3c72346a6de940a148ea686").unwrap();
assert!(!looks_like_git_repository(&url));
}
#[test]
fn parse_url_ambiguous() {
assert_snapshot!(
VerbatimUrl::parse_url("https://user/name:password@domain/a/b/c").unwrap_err().to_string(),
@"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c"
);
assert_snapshot!(
VerbatimUrl::parse_url("https://user\\name:password@domain/a/b/c").unwrap_err().to_string(),
@"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c"
);
assert_snapshot!(
VerbatimUrl::parse_url("https://user#name:password@domain/a/b/c").unwrap_err().to_string(),
@"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c"
);
assert_snapshot!(
VerbatimUrl::parse_url("https://user.com/name:password@domain/a/b/c").unwrap_err().to_string(),
@"ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c"
);
}
}

View file

@ -10,13 +10,13 @@ use std::fmt::Display;
use thiserror::Error;
use tracing::{debug, trace};
use url::Url;
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_static::EnvVars;
#[derive(Debug, Error)]
pub enum TrustedPublishingError {
#[error(transparent)]
Url(#[from] url::ParseError),
Url(#[from] DisplaySafeUrlError),
#[error("Failed to obtain OIDC token: is the `id-token: write` permission missing?")]
GitHubPermissions(#[source] ambient_id::Error),
/// A hard failure during OIDC token discovery.

View file

@ -2,7 +2,7 @@ use std::collections::BTreeMap;
use std::path::Path;
use serde::{Deserialize, Serialize};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
/// Metadata for a distribution that was installed via a direct URL.
///
@ -93,7 +93,7 @@ impl std::fmt::Display for VcsKind {
}
impl TryFrom<&DirectUrl> for DisplaySafeUrl {
type Error = url::ParseError;
type Error = DisplaySafeUrlError;
fn try_from(value: &DirectUrl) -> Result<Self, Self::Error> {
match value {

View file

@ -2,7 +2,7 @@ use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
use thiserror::Error;
use url::{ParseError, Url};
use url::Url;
use uv_cache_key::{CacheKey, CacheKeyHasher};
use uv_distribution_filename::{DistExtension, ExtensionError};
@ -10,7 +10,7 @@ use uv_git_types::{GitUrl, GitUrlParseError};
use uv_pep508::{
Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, looks_like_git_repository,
};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind};
@ -27,7 +27,7 @@ pub enum ParsedUrlError {
#[error(transparent)]
GitUrlParse(#[from] GitUrlParseError),
#[error("Not a valid URL: `{0}`")]
UrlParse(String, #[source] ParseError),
UrlParse(String, #[source] DisplaySafeUrlError),
#[error(transparent)]
VerbatimUrl(#[from] VerbatimUrlError),
#[error(

View file

@ -28,7 +28,7 @@ use uv_extract::hash::Hasher;
use uv_fs::{Simplified, rename_with_retry};
use uv_platform::{self as platform, Arch, Libc, Os, Platform};
use uv_pypi_types::{HashAlgorithm, HashDigest};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_static::EnvVars;
use crate::PythonVariant;
@ -75,7 +75,7 @@ pub enum Error {
actual: String,
},
#[error("Invalid download URL")]
InvalidUrl(#[from] url::ParseError),
InvalidUrl(#[from] DisplaySafeUrlError),
#[error("Invalid download URL: {0}")]
InvalidUrlFormat(DisplaySafeUrl),
#[error("Invalid path in file URL: `{0}`")]

View file

@ -19,6 +19,7 @@ workspace = true
ref-cast = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true }
[features]

View file

@ -4,8 +4,21 @@ use std::borrow::Cow;
use std::fmt::{Debug, Display};
use std::ops::{Deref, DerefMut};
use std::str::FromStr;
use thiserror::Error;
use url::Url;
#[derive(Error, Debug, Clone, PartialEq, Eq)]
pub enum DisplaySafeUrlError {
/// Failed to parse a URL.
#[error(transparent)]
Url(#[from] url::ParseError),
/// We parsed a URL, but couldn't disambiguate its authority
/// component.
#[error("ambiguous user/pass authority in URL (not percent-encoded?): {0}")]
AmbiguousAuthority(String),
}
/// A [`Url`] wrapper that redacts credentials when displaying the URL.
///
/// `DisplaySafeUrl` wraps the standard [`url::Url`] type, providing functionality to mask
@ -47,8 +60,55 @@ pub struct DisplaySafeUrl(Url);
impl DisplaySafeUrl {
#[inline]
pub fn parse(input: &str) -> Result<Self, url::ParseError> {
Ok(Self(Url::parse(input)?))
pub fn parse(input: &str) -> Result<Self, DisplaySafeUrlError> {
let url = Url::parse(input)?;
// Reject some ambiguous cases, e.g., `https://user/name:password@domain/a/b/c`
//
// In this case the user *probably* meant to have a username of "user/name", but both RFC
// 3986 and WHATWG URL expect the userinfo (RFC 3986) or authority (WHATWG) to not contain a
// non-percent-encoded slash or other special character.
//
// This ends up being moderately annoying to detect, since the above gets parsed into a
// "valid" WHATWG URL where the host is `used` and the pathname is
// `/name:password@domain/a/b/c` rather than causing a parse error.
//
// To detect it, we use a heuristic: if the password component is missing but the path or
// fragment contain a `:` followed by a `@`, then we assume the URL is ambiguous.
if url.password().is_none()
&& (url
.path()
.find(':')
.is_some_and(|pos| url.path()[pos..].contains('@'))
|| url
.fragment()
.map(|fragment| {
fragment
.find(':')
.is_some_and(|pos| fragment[pos..].contains('@'))
})
.unwrap_or(false))
// If the above is true, we should always expect to find these in the given URL
&& let Some(col_pos) = input.find(':')
&& let Some(at_pos) = input.rfind('@')
{
// Our ambiguous URL probably has credentials in it, so we don't want to blast it out in
// the error message. We somewhat aggressively replace everything between the scheme's
// ':' and the lastmost `@` with `***`.
let redacted_path = format!("{}***{}", &input[0..=col_pos], &input[at_pos..]);
return Err(DisplaySafeUrlError::AmbiguousAuthority(redacted_path));
}
Ok(Self(url))
}
/// Create a new [`DisplaySafeUrl`] from a [`Url`].
///
/// Unlike [`Self::parse`], this doesn't perform any ambiguity checks.
/// That means that it's primarily useful for contexts where a human can't easily accidentally
/// introduce an ambiguous URL, such as URLs being read from a request.
pub fn from_url(url: Url) -> Self {
Self(url)
}
/// Cast a `&Url` to a `&DisplaySafeUrl` using ref-cast.
@ -59,8 +119,8 @@ impl DisplaySafeUrl {
/// Parse a string as an URL, with this URL as the base URL.
#[inline]
pub fn join(&self, input: &str) -> Result<Self, url::ParseError> {
self.0.join(input).map(Self)
pub fn join(&self, input: &str) -> Result<Self, DisplaySafeUrlError> {
Ok(Self(self.0.join(input)?))
}
/// Serialize with Serde using the internal representation of the `Url` struct.
@ -78,12 +138,12 @@ impl DisplaySafeUrl {
where
D: serde::Deserializer<'de>,
{
Url::deserialize_internal(deserializer).map(Self)
Ok(Self(Url::deserialize_internal(deserializer)?))
}
#[allow(clippy::result_unit_err)]
pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, ()> {
Url::from_file_path(path).map(Self::from)
Ok(Self(Url::from_file_path(path)?))
}
/// Remove the credentials from a URL, allowing the generic `git` username (without a password)
@ -175,12 +235,6 @@ impl Debug for DisplaySafeUrl {
}
}
impl From<Url> for DisplaySafeUrl {
fn from(url: Url) -> Self {
Self(url)
}
}
impl From<DisplaySafeUrl> for Url {
fn from(url: DisplaySafeUrl) -> Self {
url.0
@ -188,10 +242,10 @@ impl From<DisplaySafeUrl> for Url {
}
impl FromStr for DisplaySafeUrl {
type Err = url::ParseError;
type Err = DisplaySafeUrlError;
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(Self(Url::from_str(input)?))
Self::parse(input)
}
}
@ -250,8 +304,8 @@ mod tests {
#[test]
fn from_url_no_credentials() {
let url_str = "https://pypi-proxy.fly.dev/basic-auth/simple";
let url = DisplaySafeUrl::parse(url_str).unwrap();
let log_safe_url = url;
let log_safe_url =
DisplaySafeUrl::parse("https://pypi-proxy.fly.dev/basic-auth/simple").unwrap();
assert_eq!(log_safe_url.username(), "");
assert!(log_safe_url.password().is_none());
assert_eq!(log_safe_url.to_string(), url_str);
@ -259,9 +313,9 @@ mod tests {
#[test]
fn from_url_username_and_password() {
let url_str = "https://user:pass@pypi-proxy.fly.dev/basic-auth/simple";
let url = DisplaySafeUrl::parse(url_str).unwrap();
let log_safe_url = url;
let log_safe_url =
DisplaySafeUrl::parse("https://user:pass@pypi-proxy.fly.dev/basic-auth/simple")
.unwrap();
assert_eq!(log_safe_url.username(), "user");
assert!(log_safe_url.password().is_some_and(|p| p == "pass"));
assert_eq!(
@ -272,9 +326,8 @@ mod tests {
#[test]
fn from_url_just_password() {
let url_str = "https://:pass@pypi-proxy.fly.dev/basic-auth/simple";
let url = DisplaySafeUrl::parse(url_str).unwrap();
let log_safe_url = url;
let log_safe_url =
DisplaySafeUrl::parse("https://:pass@pypi-proxy.fly.dev/basic-auth/simple").unwrap();
assert_eq!(log_safe_url.username(), "");
assert!(log_safe_url.password().is_some_and(|p| p == "pass"));
assert_eq!(
@ -285,9 +338,8 @@ mod tests {
#[test]
fn from_url_just_username() {
let url_str = "https://user@pypi-proxy.fly.dev/basic-auth/simple";
let url = DisplaySafeUrl::parse(url_str).unwrap();
let log_safe_url = url;
let log_safe_url =
DisplaySafeUrl::parse("https://user@pypi-proxy.fly.dev/basic-auth/simple").unwrap();
assert_eq!(log_safe_url.username(), "user");
assert!(log_safe_url.password().is_none());
assert_eq!(
@ -383,4 +435,22 @@ mod tests {
"https://user:****@pypi-proxy.fly.dev/basic-auth/simple"
);
}
#[test]
fn parse_url_ambiguous() {
for url in &[
"https://user/name:password@domain/a/b/c",
"https://user\\name:password@domain/a/b/c",
"https://user#name:password@domain/a/b/c",
"https://user.com/name:password@domain/a/b/c",
] {
let err = DisplaySafeUrl::parse(url).unwrap_err();
match err {
DisplaySafeUrlError::AmbiguousAuthority(redacted) => {
assert!(redacted.starts_with("https:***@domain/a/b/c"));
}
DisplaySafeUrlError::Url(_) => panic!("expected AmbiguousAuthority error"),
}
}
}
}

View file

@ -57,6 +57,7 @@ use uv_pep508::{Pep508Error, RequirementOrigin, VerbatimUrl, expand_env_vars};
use uv_pypi_types::VerbatimParsedUrl;
#[cfg(feature = "http")]
use uv_redacted::DisplaySafeUrl;
use uv_redacted::DisplaySafeUrlError;
use crate::requirement::EditableError;
pub use crate::requirement::RequirementsTxtRequirement;
@ -282,7 +283,7 @@ impl RequirementsTxt {
requirements_txt.join(
Url::parse(filename.as_ref())
.map_err(|err| RequirementsTxtParserError::Url {
source: err,
source: DisplaySafeUrlError::Url(err).into(),
url: filename.to_string(),
start,
end,
@ -356,7 +357,7 @@ impl RequirementsTxt {
requirements_txt.join(
Url::parse(filename.as_ref())
.map_err(|err| RequirementsTxtParserError::Url {
source: err,
source: DisplaySafeUrlError::Url(err).into(),
url: filename.to_string(),
start,
end,
@ -1041,7 +1042,7 @@ pub struct RequirementsTxtFileError {
pub enum RequirementsTxtParserError {
Io(io::Error),
Url {
source: url::ParseError,
source: uv_pep508::VerbatimUrlError,
url: String,
start: usize,
end: usize,
@ -1112,7 +1113,7 @@ pub enum RequirementsTxtParserError {
#[cfg(feature = "http")]
Reqwest(DisplaySafeUrl, reqwest_middleware::Error),
#[cfg(feature = "http")]
InvalidUrl(String, url::ParseError),
InvalidUrl(String, DisplaySafeUrlError),
}
impl Display for RequirementsTxtParserError {
@ -1184,7 +1185,15 @@ impl Display for RequirementsTxtParserError {
}
#[cfg(feature = "http")]
Self::InvalidUrl(url, err) => {
write!(f, "Not a valid URL, {err}: `{url}`")
match err {
DisplaySafeUrlError::Url(err) => write!(f, "Not a valid URL, {err}: `{url}`"),
DisplaySafeUrlError::AmbiguousAuthority(_) => {
// Intentionally avoid leaking the URL here, since we suspect that the user
// has given us an ambiguous URL that contains sensitive information.
// The error's own Display will provide a redacted version of the URL.
write!(f, "Invalid URL: {err}")
}
}
}
}
}
@ -1343,9 +1352,15 @@ impl Display for RequirementsTxtFileError {
write!(f, "Error while accessing remote requirements file: `{url}`")
}
#[cfg(feature = "http")]
RequirementsTxtParserError::InvalidUrl(url, err) => {
write!(f, "Not a valid URL, {err}: `{url}`")
}
RequirementsTxtParserError::InvalidUrl(url, err) => match err {
DisplaySafeUrlError::Url(err) => write!(f, "Not a valid URL, {err}: `{url}`"),
DisplaySafeUrlError::AmbiguousAuthority(_) => {
// Intentionally avoid leaking the URL here, since we suspect that the user
// has given us an ambiguous URL that contains sensitive information.
// The error's own Display will provide a redacted version of the URL.
write!(f, "Invalid URL: {err}")
}
},
}
}
}

View file

@ -178,7 +178,7 @@ impl<'a, Context: BuildContext> SourceTreeResolver<'a, Context> {
}
}
let Ok(url) = Url::from_directory_path(path).map(DisplaySafeUrl::from) else {
let Ok(url) = Url::from_directory_path(path).map(DisplaySafeUrl::from_url) else {
return Err(anyhow::anyhow!("Failed to convert path to URL"));
};
let source = SourceUrl::Directory(DirectorySourceUrl {

View file

@ -1420,7 +1420,7 @@ impl PylockTomlVcs {
let mut url = if let Some(url) = self.url.as_ref() {
url.clone()
} else if let Some(path) = self.path.as_ref() {
DisplaySafeUrl::from(
DisplaySafeUrl::from_url(
Url::from_directory_path(install_path.join(path))
.map_err(|()| PylockTomlErrorKind::PathToUrl)?,
)

View file

@ -44,7 +44,7 @@ use uv_pypi_types::{
ConflictKind, Conflicts, HashAlgorithm, HashDigest, HashDigests, Hashes, ParsedArchiveUrl,
ParsedGitUrl, PyProjectToml,
};
use uv_redacted::DisplaySafeUrl;
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_small_str::SmallString;
use uv_types::{BuildContext, HashStrategy};
use uv_workspace::{Editability, WorkspaceMember};
@ -5975,7 +5975,7 @@ enum SourceParseError {
given: String,
/// The URL parse error.
#[source]
err: url::ParseError,
err: DisplaySafeUrlError,
},
/// An error that occurs when a Git URL is missing a precise commit SHA.
#[error("Missing SHA in source `{given}`")]

View file

@ -84,13 +84,13 @@ fn apply_redirect(url: &VerbatimUrl, redirect: DisplaySafeUrl) -> VerbatimUrl {
#[cfg(test)]
mod tests {
use uv_pep508::VerbatimUrl;
use uv_pep508::{VerbatimUrl, VerbatimUrlError};
use uv_redacted::DisplaySafeUrl;
use crate::redirect::apply_redirect;
#[test]
fn test_apply_redirect() -> Result<(), url::ParseError> {
fn test_apply_redirect() -> Result<(), VerbatimUrlError> {
// If there's no `@` in the original representation, we can just append the precise suffix
// to the given representation.
let verbatim = VerbatimUrl::parse_url("https://github.com/flask.git")?

View file

@ -14717,3 +14717,46 @@ fn sync_no_sources_editable_to_package_switch() -> Result<()> {
Ok(())
}
#[test]
fn sync_fails_ambiguous_url() -> Result<()> {
let context = TestContext::new("3.12");
let pyproject_toml = context.temp_dir.child("pyproject.toml");
pyproject_toml.write_str(
r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = ["anyio==3.7.0"]
[[tool.uv.index]]
name = "bug"
url = "https://user/name:password@domain/a/b/c"
default = true
"#,
)?;
uv_snapshot!(context.filters(), context.sync(), @r#"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
warning: Failed to parse `pyproject.toml` during settings discovery:
TOML parse error at line 10, column 15
|
10 | url = "https://user/name:password@domain/a/b/c"
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c
error: Failed to parse: `pyproject.toml`
Caused by: TOML parse error at line 10, column 15
|
10 | url = "https://user/name:password@domain/a/b/c"
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ambiguous user/pass authority in URL (not percent-encoded?): https:***@domain/a/b/c
"#);
Ok(())
}