Parse URLs lazily in resolver (#10259)
Some checks are pending
CI / Determine changes (push) Waiting to run
CI / lint (push) Waiting to run
CI / cargo clippy | ubuntu (push) Blocked by required conditions
CI / cargo clippy | windows (push) Blocked by required conditions
CI / cargo dev generate-all (push) Blocked by required conditions
CI / cargo shear (push) Waiting to run
CI / cargo test | ubuntu (push) Blocked by required conditions
CI / cargo test | macos (push) Blocked by required conditions
CI / cargo test | windows (push) Blocked by required conditions
CI / check windows trampoline | aarch64 (push) Blocked by required conditions
CI / check windows trampoline | i686 (push) Blocked by required conditions
CI / check windows trampoline | x86_64 (push) Blocked by required conditions
CI / test windows trampoline | i686 (push) Blocked by required conditions
CI / test windows trampoline | x86_64 (push) Blocked by required conditions
CI / typos (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / build binary | linux (push) Blocked by required conditions
CI / build binary | macos aarch64 (push) Blocked by required conditions
CI / build binary | macos x86_64 (push) Blocked by required conditions
CI / build binary | windows (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / build binary | freebsd (push) Blocked by required conditions
CI / ecosystem test | prefecthq/prefect (push) Blocked by required conditions
CI / ecosystem test | pallets/flask (push) Blocked by required conditions
CI / integration test | conda on ubuntu (push) Blocked by required conditions
CI / integration test | free-threaded on linux (push) Blocked by required conditions
CI / integration test | free-threaded on windows (push) Blocked by required conditions
CI / integration test | pypy on ubuntu (push) Blocked by required conditions
CI / integration test | pypy on windows (push) Blocked by required conditions
CI / integration test | graalpy on ubuntu (push) Blocked by required conditions
CI / integration test | graalpy on windows (push) Blocked by required conditions
CI / integration test | github actions (push) Blocked by required conditions
CI / integration test | determine publish changes (push) Blocked by required conditions
CI / integration test | uv publish (push) Blocked by required conditions
CI / check cache | ubuntu (push) Blocked by required conditions
CI / check cache | macos aarch64 (push) Blocked by required conditions
CI / check system | python on debian (push) Blocked by required conditions
CI / check system | python on fedora (push) Blocked by required conditions
CI / check system | python on ubuntu (push) Blocked by required conditions
CI / check system | python on opensuse (push) Blocked by required conditions
CI / check system | python on rocky linux 8 (push) Blocked by required conditions
CI / check system | python on rocky linux 9 (push) Blocked by required conditions
CI / check system | pypy on ubuntu (push) Blocked by required conditions
CI / check system | pyston (push) Blocked by required conditions
CI / check system | alpine (push) Blocked by required conditions
CI / check system | python on macos aarch64 (push) Blocked by required conditions
CI / check system | homebrew python on macos aarch64 (push) Blocked by required conditions
CI / check system | python on macos x86_64 (push) Blocked by required conditions
CI / check system | python3.10 on windows (push) Blocked by required conditions
CI / check system | python3.10 on windows x86 (push) Blocked by required conditions
CI / check system | python3.13 on windows (push) Blocked by required conditions
CI / check system | python3.12 via chocolatey (push) Blocked by required conditions
CI / check system | python3.9 via pyenv (push) Blocked by required conditions
CI / check system | python3.13 (push) Blocked by required conditions
CI / check system | conda3.11 on linux (push) Blocked by required conditions
CI / check system | conda3.8 on linux (push) Blocked by required conditions
CI / check system | conda3.11 on macos (push) Blocked by required conditions
CI / check system | conda3.8 on macos (push) Blocked by required conditions
CI / check system | conda3.11 on windows (push) Blocked by required conditions
CI / check system | conda3.8 on windows (push) Blocked by required conditions
CI / check system | amazonlinux (push) Blocked by required conditions
CI / check system | embedded python3.10 on windows (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

## Summary

The idea here is to avoid parsing all registry URLs upfront, and instead
parse them when we need them.

Closes #6133.
This commit is contained in:
Charlie Marsh 2025-01-01 12:35:30 -05:00 committed by GitHub
parent a65aebb4f1
commit 7bbec6b2e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 70 additions and 89 deletions

View file

@ -139,7 +139,7 @@ impl From<ErrorKind> for Error {
#[derive(Debug, thiserror::Error)]
pub enum ErrorKind {
#[error(transparent)]
UrlParse(#[from] url::ParseError),
InvalidUrl(#[from] uv_distribution_types::ToUrlError),
#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),

View file

@ -474,7 +474,7 @@ impl RegistryClient {
}
}
FileLocation::AbsoluteUrl(url) => {
let url = url.to_url();
let url = url.to_url().map_err(ErrorKind::InvalidUrl)?;
if url.scheme() == "file" {
let path = url
.to_file_path()

View file

@ -1,6 +1,4 @@
use std::borrow::Cow;
use std::fmt::{self, Display, Formatter};
use std::path::PathBuf;
use std::str::FromStr;
use jiff::Timestamp;
@ -8,7 +6,7 @@ use serde::{Deserialize, Serialize};
use url::Url;
use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::VerbatimUrl;
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigest, Yanked};
/// Error converting [`uv_pypi_types::File`] to [`distribution_type::File`].
@ -56,9 +54,9 @@ impl File {
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
size: file.size,
upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
url: match Url::parse(&file.url) {
Ok(url) => FileLocation::AbsoluteUrl(url.into()),
Err(_) => FileLocation::RelativeUrl(base.to_string(), file.url),
url: match split_scheme(&file.url) {
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)),
None => FileLocation::RelativeUrl(base.to_string(), file.url),
},
yanked: file.yanked,
})
@ -101,7 +99,7 @@ impl FileLocation {
})?;
Ok(joined)
}
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.to_url()),
FileLocation::AbsoluteUrl(ref absolute) => absolute.to_url(),
}
}
@ -128,7 +126,7 @@ impl Display for FileLocation {
/// A [`Url`] represented as a `String`.
///
/// This type is guaranteed to be a valid URL but avoids being parsed into the [`Url`] type.
/// This type is not guaranteed to be a valid URL, and may error on conversion.
#[derive(
Debug,
Clone,
@ -148,10 +146,17 @@ impl Display for FileLocation {
pub struct UrlString(String);
impl UrlString {
/// Create a new [`UrlString`] from a [`String`].
pub fn new(url: String) -> Self {
Self(url)
}
/// Converts a [`UrlString`] to a [`Url`].
pub fn to_url(&self) -> Url {
// This conversion can never fail as the only way to construct a `UrlString` is from a `Url`.
Url::from_str(&self.0).unwrap()
pub fn to_url(&self) -> Result<Url, ToUrlError> {
Url::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
absolute: self.0.clone(),
err,
})
}
/// Return the [`UrlString`] with any query parameters and fragments removed.
@ -194,42 +199,18 @@ impl From<&Url> for UrlString {
}
}
impl From<Cow<'_, Url>> for UrlString {
fn from(value: Cow<'_, Url>) -> Self {
UrlString(value.to_string())
}
}
impl From<VerbatimUrl> for UrlString {
fn from(value: VerbatimUrl) -> Self {
UrlString(value.raw().to_string())
}
}
impl From<&VerbatimUrl> for UrlString {
fn from(value: &VerbatimUrl) -> Self {
UrlString(value.raw().to_string())
}
}
impl From<UrlString> for String {
fn from(value: UrlString) -> Self {
value.0
}
}
impl Display for UrlString {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
/// An error that occurs when a `FileLocation` is not a valid URL.
/// An error that occurs when a [`FileLocation`] is not a valid URL.
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum ToUrlError {
/// An error that occurs when the base URL in `FileLocation::Relative`
/// An error that occurs when the base URL in [`FileLocation::Relative`]
/// could not be parsed as a valid URL.
#[error("could not parse base URL `{base}` as a valid URL")]
#[error("Could not parse base URL `{base}` as a valid URL")]
InvalidBase {
/// The base URL that could not be parsed as a valid URL.
base: String,
@ -238,8 +219,8 @@ pub enum ToUrlError {
err: url::ParseError,
},
/// An error that occurs when the base URL could not be joined with
/// the relative path in a `FileLocation::Relative`.
#[error("could not join base URL `{base}` to relative path `{path}`")]
/// the relative path in a [`FileLocation::Relative`].
#[error("Could not join base URL `{base}` to relative path `{path}`")]
InvalidJoin {
/// The base URL that could not be parsed as a valid URL.
base: String,
@ -249,9 +230,9 @@ pub enum ToUrlError {
#[source]
err: url::ParseError,
},
/// An error that occurs when the absolute URL in `FileLocation::Absolute`
/// An error that occurs when the absolute URL in [`FileLocation::Absolute`]
/// could not be parsed as a valid URL.
#[error("could not parse absolute URL `{absolute}` as a valid URL")]
#[error("Could not parse absolute URL `{absolute}` as a valid URL")]
InvalidAbsolute {
/// The absolute URL that could not be parsed as a valid URL.
absolute: String,
@ -259,21 +240,6 @@ pub enum ToUrlError {
#[source]
err: url::ParseError,
},
/// An error that occurs when the file path in `FileLocation::Path` is
/// not valid UTF-8. We need paths to be valid UTF-8 to be transformed
/// into URLs, which must also be UTF-8.
#[error("could not build URL from file path `{path}` because it is not valid UTF-8")]
PathNotUtf8 {
/// The original path that was not valid UTF-8.
path: PathBuf,
},
/// An error that occurs when the file URL created from a file path is not
/// a valid URL.
#[error("could not parse file path `{path}` as a valid URL")]
InvalidPath {
/// The file path URL that could not be parsed as a valid URL.
path: String,
},
}
#[cfg(test)]

View file

@ -187,7 +187,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// Create a cache entry for the wheel.

View file

@ -21,11 +21,11 @@ pub enum Error {
NoBuild,
// Network error
#[error("Failed to parse URL: {0}")]
Url(String, #[source] url::ParseError),
#[error("Expected an absolute path, but received: {}", _0.user_display())]
RelativePath(PathBuf),
#[error(transparent)]
InvalidUrl(#[from] uv_distribution_types::ToUrlError),
#[error(transparent)]
ParsedUrl(#[from] ParsedUrlError),
#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),

View file

@ -114,7 +114,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// If the URL is a file URL, use the local path directly.
@ -263,7 +263,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};
// If the URL is a file URL, use the local path directly.

View file

@ -81,7 +81,7 @@ pub fn read_lock_requirements(
preferences.push(Preference::from_lock(package, install_path)?);
// Map each entry in the lockfile to a Git SHA.
if let Some(git_ref) = package.as_git_ref() {
if let Some(git_ref) = package.as_git_ref()? {
git.push(git_ref);
}
}

View file

@ -1098,7 +1098,7 @@ impl Lock {
.into_iter()
.filter_map(|index| match index.url() {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
Some(UrlString::from(index.url().redacted()))
Some(UrlString::from(index.url().redacted().as_ref()))
}
IndexUrl::Path(_) => None,
})
@ -1814,7 +1814,7 @@ impl Package {
let filename: WheelFilename =
self.wheels[best_wheel_index].filename.clone();
let url = Url::from(ParsedArchiveUrl {
url: url.to_url(),
url: url.to_url().map_err(LockErrorKind::InvalidUrl)?,
subdirectory: direct.subdirectory.clone(),
ext: DistExtension::Wheel,
});
@ -1946,7 +1946,7 @@ impl Package {
Source::Git(url, git) => {
// Remove the fragment and query from the URL; they're already present in the
// `GitSource`.
let mut url = url.to_url();
let mut url = url.to_url().map_err(LockErrorKind::InvalidUrl)?;
url.set_fragment(None);
url.set_query(None);
@ -1976,7 +1976,7 @@ impl Package {
let DistExtension::Source(ext) = DistExtension::from_path(url.as_ref())? else {
return Ok(None);
};
let location = url.to_url();
let location = url.to_url().map_err(LockErrorKind::InvalidUrl)?;
let subdirectory = direct.subdirectory.as_ref().map(PathBuf::from);
let url = Url::from(ParsedArchiveUrl {
url: location.clone(),
@ -2020,7 +2020,10 @@ impl Package {
url: FileLocation::AbsoluteUrl(file_url.clone()),
yanked: None,
});
let index = IndexUrl::from(VerbatimUrl::from_url(url.to_url()));
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));
let reg_dist = RegistrySourceDist {
name: self.id.name.clone(),
@ -2262,7 +2265,9 @@ impl Package {
pub fn index(&self, root: &Path) -> Result<Option<IndexUrl>, LockError> {
match &self.id.source {
Source::Registry(RegistrySource::Url(url)) => {
let index = IndexUrl::from(VerbatimUrl::from_url(url.to_url()));
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));
Ok(Some(index))
}
Source::Registry(RegistrySource::Path(path)) => {
@ -2291,16 +2296,16 @@ impl Package {
}
/// Returns the [`ResolvedRepositoryReference`] for the package, if it is a Git source.
pub fn as_git_ref(&self) -> Option<ResolvedRepositoryReference> {
pub fn as_git_ref(&self) -> Result<Option<ResolvedRepositoryReference>, LockError> {
match &self.id.source {
Source::Git(url, git) => Some(ResolvedRepositoryReference {
Source::Git(url, git) => Ok(Some(ResolvedRepositoryReference {
reference: RepositoryReference {
url: RepositoryUrl::new(&url.to_url()),
url: RepositoryUrl::new(&url.to_url().map_err(LockErrorKind::InvalidUrl)?),
reference: GitReference::from(git.kind.clone()),
},
sha: git.precise,
}),
_ => None,
})),
_ => Ok(None),
}
}
}
@ -3138,7 +3143,7 @@ impl SourceDist {
match &reg_dist.index {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
let url = normalize_file_location(&reg_dist.file.url)
.map_err(LockErrorKind::InvalidFileUrl)
.map_err(LockErrorKind::InvalidUrl)
.map_err(LockError::from)?;
let hash = reg_dist.file.hashes.iter().max().cloned().map(Hash::from);
let size = reg_dist.file.size;
@ -3153,7 +3158,7 @@ impl SourceDist {
.file
.url
.to_url()
.map_err(LockErrorKind::InvalidFileUrl)?
.map_err(LockErrorKind::InvalidUrl)?
.to_file_path()
.map_err(|()| LockErrorKind::UrlToPath)?;
let path = relative_to(&reg_dist_path, index_path)
@ -3444,7 +3449,7 @@ impl Wheel {
match &wheel.index {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
let url = normalize_file_location(&wheel.file.url)
.map_err(LockErrorKind::InvalidFileUrl)
.map_err(LockErrorKind::InvalidUrl)
.map_err(LockError::from)?;
let hash = wheel.file.hashes.iter().max().cloned().map(Hash::from);
let size = wheel.file.size;
@ -3461,7 +3466,7 @@ impl Wheel {
.file
.url
.to_url()
.map_err(LockErrorKind::InvalidFileUrl)?
.map_err(LockErrorKind::InvalidUrl)?
.to_file_path()
.map_err(|()| LockErrorKind::UrlToPath)?;
let path = relative_to(&wheel_path, index_path)
@ -3507,7 +3512,7 @@ impl Wheel {
let filename: WheelFilename = self.filename.clone();
match source {
RegistrySource::Url(index_url) => {
RegistrySource::Url(url) => {
let file_url = match &self.url {
WheelWireSource::Url { url } => url,
WheelWireSource::Path { .. } | WheelWireSource::Filename { .. } => {
@ -3528,7 +3533,9 @@ impl Wheel {
url: FileLocation::AbsoluteUrl(file_url.clone()),
yanked: None,
});
let index = IndexUrl::from(VerbatimUrl::from_url(index_url.to_url()));
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));
Ok(RegistryBuiltWheel {
filename,
file,
@ -4094,11 +4101,11 @@ enum LockErrorKind {
},
/// An error that occurs when the URL to a file for a wheel or
/// source dist could not be converted to a structured `url::Url`.
#[error("Failed to parse wheel or source distribution URL")]
InvalidFileUrl(
#[error(transparent)]
InvalidUrl(
/// The underlying error that occurred. This includes the
/// errant URL in its error message.
#[source]
#[from]
ToUrlError,
),
/// An error that occurs when the extension can't be determined

View file

@ -303,7 +303,7 @@ impl std::fmt::Display for RequirementsTxtExport<'_> {
Source::Git(url, git) => {
// Remove the fragment and query from the URL; they're already present in the
// `GitSource`.
let mut url = url.to_url();
let mut url = url.to_url().map_err(|_| std::fmt::Error)?;
url.set_fragment(None);
url.set_query(None);
@ -325,7 +325,7 @@ impl std::fmt::Display for RequirementsTxtExport<'_> {
Source::Direct(url, direct) => {
let subdirectory = direct.subdirectory.as_ref().map(PathBuf::from);
let url = Url::from(ParsedArchiveUrl {
url: url.to_url(),
url: url.to_url().map_err(|_| std::fmt::Error)?,
subdirectory: subdirectory.clone(),
ext: DistExtension::Source(SourceDistExtension::TarGz),
});
@ -333,7 +333,11 @@ impl std::fmt::Display for RequirementsTxtExport<'_> {
}
Source::Path(path) | Source::Directory(path) => {
if path.is_absolute() {
write!(f, "{}", Url::from_file_path(path).unwrap())?;
write!(
f,
"{}",
Url::from_file_path(path).map_err(|()| std::fmt::Error)?
)?;
} else {
write!(f, "{}", anchor(path).portable_display())?;
}
@ -344,7 +348,11 @@ impl std::fmt::Display for RequirementsTxtExport<'_> {
}
EditableMode::NonEditable => {
if path.is_absolute() {
write!(f, "{}", Url::from_file_path(path).unwrap())?;
write!(
f,
"{}",
Url::from_file_path(path).map_err(|()| std::fmt::Error)?
)?;
} else {
write!(f, "{}", anchor(path).portable_display())?;
}