Simplify relative URL handling

I was trying to figure out what the correct relative-and-absolute URL handling function was and realized there are two and they are redundant.
This commit is contained in:
konstin 2025-07-03 23:20:48 +02:00
parent ff98ee3273
commit 6e4e5aaf8d
7 changed files with 35 additions and 92 deletions

View file

@ -152,9 +152,6 @@ pub enum ErrorKind {
#[error(transparent)] #[error(transparent)]
InvalidUrl(#[from] uv_distribution_types::ToUrlError), InvalidUrl(#[from] uv_distribution_types::ToUrlError),
#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),
#[error(transparent)] #[error(transparent)]
Flat(#[from] FlatIndexError), Flat(#[from] FlatIndexError),

View file

@ -21,7 +21,7 @@ use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost}; use uv_configuration::{IndexStrategy, TrustedHost};
use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use uv_distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use uv_distribution_types::{ use uv_distribution_types::{
BuiltDist, File, FileLocation, IndexCapabilities, IndexEntryFilename, IndexFormat, BuiltDist, File, IndexCapabilities, IndexEntryFilename, IndexFormat,
IndexLocations, IndexMetadataRef, IndexStatusCodeDecision, IndexStatusCodeStrategy, IndexUrl, IndexLocations, IndexMetadataRef, IndexStatusCodeDecision, IndexStatusCodeStrategy, IndexUrl,
IndexUrls, Name, VariantsJson, IndexUrls, Name, VariantsJson,
}; };
@ -683,30 +683,14 @@ impl RegistryClient {
let wheel = wheels.best_wheel(); let wheel = wheels.best_wheel();
let location = match &wheel.file.url { let url = wheel.file.url.to_url().map_err(ErrorKind::InvalidUrl)?;
FileLocation::RelativeUrl(base, url) => { let location = if url.scheme() == "file" {
let url = uv_pypi_types::base_url_join_relative(base, url) let path = url
.map_err(ErrorKind::JoinRelativeUrl)?; .to_file_path()
if url.scheme() == "file" { .map_err(|()| ErrorKind::NonFileUrl(url.clone()))?;
let path = url WheelLocation::Path(path)
.to_file_path() } else {
.map_err(|()| ErrorKind::NonFileUrl(url.clone()))?; WheelLocation::Url(url)
WheelLocation::Path(path)
} else {
WheelLocation::Url(url)
}
}
FileLocation::AbsoluteUrl(url) => {
let url = url.to_url().map_err(ErrorKind::InvalidUrl)?;
if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|()| ErrorKind::NonFileUrl(url.clone()))?;
WheelLocation::Path(path)
} else {
WheelLocation::Url(url)
}
}
}; };
match location { match location {
@ -1271,17 +1255,18 @@ mod tests {
use url::Url; use url::Url;
use uv_normalize::PackageName; use uv_normalize::PackageName;
use uv_pypi_types::{JoinRelativeError, SimpleJson}; use uv_pypi_types::SimpleJson;
use uv_redacted::DisplaySafeUrl; use uv_redacted::DisplaySafeUrl;
use crate::{SimpleMetadata, SimpleMetadatum, html::SimpleHtml}; use crate::{SimpleMetadata, SimpleMetadatum, html::SimpleHtml};
use crate::RegistryClientBuilder;
use uv_cache::Cache; use uv_cache::Cache;
use uv_distribution_types::{FileLocation, ToUrlError};
use uv_small_str::SmallString;
use wiremock::matchers::{basic_auth, method, path_regex}; use wiremock::matchers::{basic_auth, method, path_regex};
use wiremock::{Mock, MockServer, ResponseTemplate}; use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::RegistryClientBuilder;
type Error = Box<dyn std::error::Error>; type Error = Box<dyn std::error::Error>;
async fn start_test_server(username: &'static str, password: &'static str) -> MockServer { async fn start_test_server(username: &'static str, password: &'static str) -> MockServer {
@ -1545,7 +1530,7 @@ mod tests {
/// ///
/// See: <https://github.com/astral-sh/uv/issues/1388> /// See: <https://github.com/astral-sh/uv/issues/1388>
#[test] #[test]
fn relative_urls_code_artifact() -> Result<(), JoinRelativeError> { fn relative_urls_code_artifact() -> Result<(), ToUrlError> {
let text = r#" let text = r#"
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
@ -1568,12 +1553,13 @@ mod tests {
let base = DisplaySafeUrl::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask") let base = DisplaySafeUrl::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask")
.unwrap(); .unwrap();
let SimpleHtml { base, files } = SimpleHtml::parse(text, &base).unwrap(); let SimpleHtml { base, files } = SimpleHtml::parse(text, &base).unwrap();
let base = SmallString::from(base.as_str());
// Test parsing of the file urls // Test parsing of the file urls
let urls = files let urls = files
.iter() .into_iter()
.map(|file| uv_pypi_types::base_url_join_relative(base.as_url().as_str(), &file.url)) .map(|file| FileLocation::new(file.url, &base).to_url())
.collect::<Result<Vec<_>, JoinRelativeError>>()?; .collect::<Result<Vec<_>, _>>()?;
let urls = urls let urls = urls
.iter() .iter()
.map(DisplaySafeUrl::to_string) .map(DisplaySafeUrl::to_string)

View file

@ -57,10 +57,7 @@ impl File {
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?, .map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
size: file.size, size: file.size,
upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond), upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
url: match split_scheme(&file.url) { url: FileLocation::new(file.url, base),
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)),
None => FileLocation::RelativeUrl(base.clone(), file.url),
},
yanked: file.yanked, yanked: file.yanked,
}) })
} }
@ -77,6 +74,17 @@ pub enum FileLocation {
} }
impl FileLocation { impl FileLocation {
/// Parse a relative or absolute URL on a page with a base URL.
///
/// This follows the HTML semantics where a link on a page is resolved relative to the URL of
/// that page.
pub fn new(url: SmallString, base: &SmallString) -> Self {
match split_scheme(&url) {
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(url)),
None => FileLocation::RelativeUrl(base.clone(), url),
}
}
/// Convert this location to a URL. /// Convert this location to a URL.
/// ///
/// A relative URL has its base joined to the path. An absolute URL is /// A relative URL has its base joined to the path. An absolute URL is

View file

@ -20,7 +20,7 @@ use uv_client::{
}; };
use uv_distribution_filename::WheelFilename; use uv_distribution_filename::WheelFilename;
use uv_distribution_types::{ use uv_distribution_types::{
BuildableSource, BuiltDist, Dist, FileLocation, HashPolicy, Hashed, InstalledDist, Name, BuildableSource, BuiltDist, Dist, HashPolicy, Hashed, InstalledDist, Name,
SourceDist, SourceDist,
}; };
use uv_extract::hash::Hasher; use uv_extract::hash::Hasher;
@ -179,12 +179,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
match dist { match dist {
BuiltDist::Registry(wheels) => { BuiltDist::Registry(wheels) => {
let wheel = wheels.best_wheel(); let wheel = wheels.best_wheel();
let url = match &wheel.file.url { let url = wheel.file.url.to_url()?;
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// Create a cache entry for the wheel. // Create a cache entry for the wheel.
let wheel_entry = self.build_context.cache().entry( let wheel_entry = self.build_context.cache().entry(

View file

@ -25,8 +25,6 @@ pub enum Error {
RelativePath(PathBuf), RelativePath(PathBuf),
#[error(transparent)] #[error(transparent)]
InvalidUrl(#[from] uv_distribution_types::ToUrlError), InvalidUrl(#[from] uv_distribution_types::ToUrlError),
#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),
#[error("Expected a file URL, but received: {0}")] #[error("Expected a file URL, but received: {0}")]
NonFileUrl(DisplaySafeUrl), NonFileUrl(DisplaySafeUrl),
#[error(transparent)] #[error(transparent)]

View file

@ -32,7 +32,7 @@ use uv_client::{
use uv_configuration::{BuildKind, BuildOutput, SourceStrategy}; use uv_configuration::{BuildKind, BuildOutput, SourceStrategy};
use uv_distribution_filename::{SourceDistExtension, WheelFilename}; use uv_distribution_filename::{SourceDistExtension, WheelFilename};
use uv_distribution_types::{ use uv_distribution_types::{
BuildableSource, DirectorySourceUrl, FileLocation, GitSourceUrl, HashPolicy, Hashed, BuildableSource, DirectorySourceUrl, GitSourceUrl, HashPolicy, Hashed,
PathSourceUrl, SourceDist, SourceUrl, PathSourceUrl, SourceDist, SourceUrl,
}; };
use uv_extract::hash::Hasher; use uv_extract::hash::Hasher;
@ -122,12 +122,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.join(dist.version.to_string()), .join(dist.version.to_string()),
); );
let url = match &dist.file.url { let url = dist.file.url.to_url()?;
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// If the URL is a file URL, use the local path directly. // If the URL is a file URL, use the local path directly.
if url.scheme() == "file" { if url.scheme() == "file" {
@ -271,12 +266,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.join(dist.version.to_string()), .join(dist.version.to_string()),
); );
let url = match &dist.file.url { let url = dist.file.url.to_url()?;
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// If the URL is a file URL, use the local path directly. // If the URL is a file URL, use the local path directly.
if url.scheme() == "file" { if url.scheme() == "file" {

View file

@ -1,37 +1,6 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use uv_redacted::DisplaySafeUrl; use uv_redacted::DisplaySafeUrl;
/// Join a relative URL to a base URL.
pub fn base_url_join_relative(
base: &str,
relative: &str,
) -> Result<DisplaySafeUrl, JoinRelativeError> {
let base_url = DisplaySafeUrl::parse(base).map_err(|err| JoinRelativeError::ParseError {
original: base.to_string(),
source: err,
})?;
base_url
.join(relative)
.map_err(|err| JoinRelativeError::ParseError {
original: format!("{base}/{relative}"),
source: err,
})
}
/// An error that occurs when `base_url_join_relative` fails.
///
/// The error message includes the URL (`base` or `maybe_relative`) passed to
/// `base_url_join_relative` that provoked the error.
#[derive(Clone, Debug, thiserror::Error)]
pub enum JoinRelativeError {
#[error("Failed to parse URL: `{original}`")]
ParseError {
original: String,
source: url::ParseError,
},
}
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
pub struct BaseUrl( pub struct BaseUrl(
#[serde( #[serde(