mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 13:25:00 +00:00
Avoid reparsing wheel URLs (#4947)
## Summary We currently store wheel URLs in an unparsed state because we don't have a stable parsed representation to use with rykv. Unfortunately this means we end up reparsing unnecessarily in a lot of places, especially when constructing a `Lock`. This PR adds a `UrlString` type that lets us avoid reparsing without losing the validity of the `Url`. ## Test Plan Shaves off another ~10 ms from https://github.com/astral-sh/uv/issues/4860. ``` ➜ transformers hyperfine "../../uv/target/profiling/uv lock" "../../uv/target/profiling/baseline lock" --warmup 3 Benchmark 1: ../../uv/target/profiling/uv lock Time (mean ± σ): 120.9 ms ± 2.5 ms [User: 126.0 ms, System: 80.6 ms] Range (min … max): 116.8 ms … 125.7 ms 23 runs Benchmark 2: ../../uv/target/profiling/baseline lock Time (mean ± σ): 129.9 ms ± 4.2 ms [User: 127.1 ms, System: 86.1 ms] Range (min … max): 123.4 ms … 141.2 ms 23 runs Summary ../../uv/target/profiling/uv lock ran 1.07 ± 0.04 times faster than ../../uv/target/profiling/baseline lock ```
This commit is contained in:
parent
23eb42deed
commit
d833910a5d
6 changed files with 84 additions and 44 deletions
|
@ -1,11 +1,11 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
|
||||
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
||||
use pep508_rs::split_scheme;
|
||||
use pypi_types::{CoreMetadata, HashDigest, Yanked};
|
||||
|
||||
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
|
||||
|
@ -56,10 +56,9 @@ impl File {
|
|||
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
|
||||
size: file.size,
|
||||
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
|
||||
url: if split_scheme(&file.url).is_some() {
|
||||
FileLocation::AbsoluteUrl(file.url)
|
||||
} else {
|
||||
FileLocation::RelativeUrl(base.to_string(), file.url)
|
||||
url: match Url::parse(&file.url) {
|
||||
Ok(url) => FileLocation::AbsoluteUrl(url.into()),
|
||||
Err(_) => FileLocation::RelativeUrl(base.to_string(), file.url),
|
||||
},
|
||||
yanked: file.yanked,
|
||||
})
|
||||
|
@ -76,7 +75,7 @@ pub enum FileLocation {
|
|||
/// URL relative to the base URL.
|
||||
RelativeUrl(String, String),
|
||||
/// Absolute URL.
|
||||
AbsoluteUrl(String),
|
||||
AbsoluteUrl(UrlString),
|
||||
/// Absolute path to a file.
|
||||
Path(#[with(rkyv::with::AsString)] PathBuf),
|
||||
}
|
||||
|
@ -107,13 +106,7 @@ impl FileLocation {
|
|||
})?;
|
||||
Ok(joined)
|
||||
}
|
||||
FileLocation::AbsoluteUrl(ref absolute) => {
|
||||
let url = Url::parse(absolute).map_err(|err| ToUrlError::InvalidAbsolute {
|
||||
absolute: absolute.clone(),
|
||||
err,
|
||||
})?;
|
||||
Ok(url)
|
||||
}
|
||||
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.to_url()),
|
||||
FileLocation::Path(ref path) => {
|
||||
let path = path
|
||||
.to_str()
|
||||
|
@ -125,18 +118,79 @@ impl FileLocation {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert this location to a URL.
|
||||
///
|
||||
/// This method is identical to [`FileLocation::to_url`] except it avoids parsing absolute URLs
|
||||
/// as they are already guaranteed to be valid.
|
||||
pub fn to_url_string(&self) -> Result<UrlString, ToUrlError> {
|
||||
match *self {
|
||||
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.clone()),
|
||||
_ => Ok(self.to_url()?.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FileLocation {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::RelativeUrl(_base, url) => Display::fmt(&url, f),
|
||||
Self::AbsoluteUrl(url) => Display::fmt(&url, f),
|
||||
Self::AbsoluteUrl(url) => Display::fmt(&url.0, f),
|
||||
Self::Path(path) => Display::fmt(&path.display(), f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [`Url`] represented as a `String`.
|
||||
///
|
||||
/// This type is guaranteed to be a valid URL but avoids being parsed into the [`Url`] type.
|
||||
#[derive(
|
||||
Debug,
|
||||
Clone,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
rkyv::Archive,
|
||||
rkyv::Deserialize,
|
||||
rkyv::Serialize,
|
||||
PartialEq,
|
||||
Eq,
|
||||
)]
|
||||
#[archive(check_bytes)]
|
||||
#[archive_attr(derive(Debug))]
|
||||
pub struct UrlString(String);
|
||||
|
||||
impl UrlString {
|
||||
/// Converts a [`UrlString`] to a [`Url`].
|
||||
pub fn to_url(&self) -> Url {
|
||||
// This conversion can never fail as the only way to construct a `UrlString` is from a `Url`.
|
||||
Url::from_str(&self.0).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for UrlString {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Url> for UrlString {
|
||||
fn from(value: Url) -> Self {
|
||||
UrlString(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UrlString> for String {
|
||||
fn from(value: UrlString) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UrlString {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
fmt::Display::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that occurs when a `FileLocation` is not a valid URL.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
|
||||
pub enum ToUrlError {
|
||||
|
|
|
@ -421,7 +421,7 @@ impl RegistryClient {
|
|||
}
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => {
|
||||
let url = Url::parse(url).map_err(ErrorKind::UrlParse)?;
|
||||
let url = url.to_url();
|
||||
if url.scheme() == "file" {
|
||||
let path = url
|
||||
.to_file_path()
|
||||
|
|
|
@ -160,9 +160,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
|||
FileLocation::RelativeUrl(base, url) => {
|
||||
pypi_types::base_url_join_relative(base, url)?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => {
|
||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||
FileLocation::Path(path) => {
|
||||
let cache_entry = self.build_context.cache().entry(
|
||||
CacheBucket::Wheels,
|
||||
|
|
|
@ -102,9 +102,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
|||
FileLocation::RelativeUrl(base, url) => {
|
||||
pypi_types::base_url_join_relative(base, url)?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => {
|
||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||
FileLocation::Path(path) => {
|
||||
let url = Url::from_file_path(path)
|
||||
.map_err(|()| Error::RelativePath(path.clone()))?;
|
||||
|
@ -280,9 +278,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
|||
FileLocation::RelativeUrl(base, url) => {
|
||||
pypi_types::base_url_join_relative(base, url)?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => {
|
||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
||||
}
|
||||
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||
FileLocation::Path(path) => {
|
||||
let url = Url::from_file_path(path)
|
||||
.map_err(|()| Error::RelativePath(path.clone()))?;
|
||||
|
|
|
@ -17,7 +17,7 @@ use distribution_filename::WheelFilename;
|
|||
use distribution_types::{
|
||||
BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist, Dist, FileLocation,
|
||||
GitSourceDist, IndexUrl, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistryBuiltWheel,
|
||||
RegistrySourceDist, RemoteSource, Resolution, ResolvedDist, ToUrlError,
|
||||
RegistrySourceDist, RemoteSource, Resolution, ResolvedDist, ToUrlError, UrlString,
|
||||
};
|
||||
use pep440_rs::{Version, VersionSpecifiers};
|
||||
use pep508_rs::{
|
||||
|
@ -716,7 +716,7 @@ impl Distribution {
|
|||
requires_python: None,
|
||||
size: sdist.size(),
|
||||
upload_time_utc_ms: None,
|
||||
url: FileLocation::AbsoluteUrl(file_url.to_string()),
|
||||
url: FileLocation::AbsoluteUrl(file_url.clone().into()),
|
||||
yanked: None,
|
||||
});
|
||||
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
||||
|
@ -1705,7 +1705,7 @@ struct Wheel {
|
|||
/// against was found. The location does not need to exist in the future,
|
||||
/// so this should be treated as only a hint to where to look and/or
|
||||
/// recording where the wheel file originally came from.
|
||||
url: Url,
|
||||
url: UrlString,
|
||||
/// A hash of the built distribution.
|
||||
///
|
||||
/// This is only present for wheels that come from registries and direct
|
||||
|
@ -1773,7 +1773,7 @@ impl Wheel {
|
|||
let url = wheel
|
||||
.file
|
||||
.url
|
||||
.to_url()
|
||||
.to_url_string()
|
||||
.map_err(LockErrorKind::InvalidFileUrl)
|
||||
.map_err(LockError::from)?;
|
||||
let hash = wheel.file.hashes.first().cloned().map(Hash::from);
|
||||
|
@ -1788,7 +1788,7 @@ impl Wheel {
|
|||
|
||||
fn from_direct_dist(direct_dist: &DirectUrlBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
||||
Wheel {
|
||||
url: direct_dist.url.to_url(),
|
||||
url: direct_dist.url.to_url().into(),
|
||||
hash: hashes.first().cloned().map(Hash::from),
|
||||
size: None,
|
||||
filename: direct_dist.filename.clone(),
|
||||
|
@ -1797,7 +1797,7 @@ impl Wheel {
|
|||
|
||||
fn from_path_dist(path_dist: &PathBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
||||
Wheel {
|
||||
url: path_dist.url.to_url(),
|
||||
url: path_dist.url.to_url().into(),
|
||||
hash: hashes.first().cloned().map(Hash::from),
|
||||
size: None,
|
||||
filename: path_dist.filename.clone(),
|
||||
|
@ -1813,7 +1813,7 @@ impl Wheel {
|
|||
requires_python: None,
|
||||
size: self.size,
|
||||
upload_time_utc_ms: None,
|
||||
url: FileLocation::AbsoluteUrl(self.url.to_string()),
|
||||
url: FileLocation::AbsoluteUrl(self.url.clone()),
|
||||
yanked: None,
|
||||
});
|
||||
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
||||
|
@ -1872,7 +1872,7 @@ impl TryFrom<WheelWire> for Wheel {
|
|||
.map_err(|err| format!("failed to parse `{filename}` as wheel filename: {err}"))?;
|
||||
|
||||
Ok(Wheel {
|
||||
url: wire.url,
|
||||
url: wire.url.into(),
|
||||
hash: wire.hash,
|
||||
size: wire.size,
|
||||
filename,
|
||||
|
|
|
@ -19,17 +19,9 @@ Ok(
|
|||
sdist: None,
|
||||
wheels: [
|
||||
Wheel {
|
||||
url: Url {
|
||||
scheme: "file",
|
||||
cannot_be_a_base: false,
|
||||
username: "",
|
||||
password: None,
|
||||
host: None,
|
||||
port: None,
|
||||
path: "/foo/bar/anyio-4.3.0-py3-none-any.whl",
|
||||
query: None,
|
||||
fragment: None,
|
||||
},
|
||||
url: UrlString(
|
||||
"file:///foo/bar/anyio-4.3.0-py3-none-any.whl",
|
||||
),
|
||||
hash: Some(
|
||||
Hash(
|
||||
HashDigest {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue