mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
Avoid reparsing wheel URLs (#4947)
## Summary We currently store wheel URLs in an unparsed state because we don't have a stable parsed representation to use with rykv. Unfortunately this means we end up reparsing unnecessarily in a lot of places, especially when constructing a `Lock`. This PR adds a `UrlString` type that lets us avoid reparsing without losing the validity of the `Url`. ## Test Plan Shaves off another ~10 ms from https://github.com/astral-sh/uv/issues/4860. ``` ➜ transformers hyperfine "../../uv/target/profiling/uv lock" "../../uv/target/profiling/baseline lock" --warmup 3 Benchmark 1: ../../uv/target/profiling/uv lock Time (mean ± σ): 120.9 ms ± 2.5 ms [User: 126.0 ms, System: 80.6 ms] Range (min … max): 116.8 ms … 125.7 ms 23 runs Benchmark 2: ../../uv/target/profiling/baseline lock Time (mean ± σ): 129.9 ms ± 4.2 ms [User: 127.1 ms, System: 86.1 ms] Range (min … max): 123.4 ms … 141.2 ms 23 runs Summary ../../uv/target/profiling/uv lock ran 1.07 ± 0.04 times faster than ../../uv/target/profiling/baseline lock ```
This commit is contained in:
parent
23eb42deed
commit
d833910a5d
6 changed files with 84 additions and 44 deletions
|
@ -1,11 +1,11 @@
|
||||||
use std::fmt::{Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
|
||||||
use pep508_rs::split_scheme;
|
|
||||||
use pypi_types::{CoreMetadata, HashDigest, Yanked};
|
use pypi_types::{CoreMetadata, HashDigest, Yanked};
|
||||||
|
|
||||||
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
|
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
|
||||||
|
@ -56,10 +56,9 @@ impl File {
|
||||||
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
|
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
|
||||||
size: file.size,
|
size: file.size,
|
||||||
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
|
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
|
||||||
url: if split_scheme(&file.url).is_some() {
|
url: match Url::parse(&file.url) {
|
||||||
FileLocation::AbsoluteUrl(file.url)
|
Ok(url) => FileLocation::AbsoluteUrl(url.into()),
|
||||||
} else {
|
Err(_) => FileLocation::RelativeUrl(base.to_string(), file.url),
|
||||||
FileLocation::RelativeUrl(base.to_string(), file.url)
|
|
||||||
},
|
},
|
||||||
yanked: file.yanked,
|
yanked: file.yanked,
|
||||||
})
|
})
|
||||||
|
@ -76,7 +75,7 @@ pub enum FileLocation {
|
||||||
/// URL relative to the base URL.
|
/// URL relative to the base URL.
|
||||||
RelativeUrl(String, String),
|
RelativeUrl(String, String),
|
||||||
/// Absolute URL.
|
/// Absolute URL.
|
||||||
AbsoluteUrl(String),
|
AbsoluteUrl(UrlString),
|
||||||
/// Absolute path to a file.
|
/// Absolute path to a file.
|
||||||
Path(#[with(rkyv::with::AsString)] PathBuf),
|
Path(#[with(rkyv::with::AsString)] PathBuf),
|
||||||
}
|
}
|
||||||
|
@ -107,13 +106,7 @@ impl FileLocation {
|
||||||
})?;
|
})?;
|
||||||
Ok(joined)
|
Ok(joined)
|
||||||
}
|
}
|
||||||
FileLocation::AbsoluteUrl(ref absolute) => {
|
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.to_url()),
|
||||||
let url = Url::parse(absolute).map_err(|err| ToUrlError::InvalidAbsolute {
|
|
||||||
absolute: absolute.clone(),
|
|
||||||
err,
|
|
||||||
})?;
|
|
||||||
Ok(url)
|
|
||||||
}
|
|
||||||
FileLocation::Path(ref path) => {
|
FileLocation::Path(ref path) => {
|
||||||
let path = path
|
let path = path
|
||||||
.to_str()
|
.to_str()
|
||||||
|
@ -125,18 +118,79 @@ impl FileLocation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert this location to a URL.
|
||||||
|
///
|
||||||
|
/// This method is identical to [`FileLocation::to_url`] except it avoids parsing absolute URLs
|
||||||
|
/// as they are already guaranteed to be valid.
|
||||||
|
pub fn to_url_string(&self) -> Result<UrlString, ToUrlError> {
|
||||||
|
match *self {
|
||||||
|
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.clone()),
|
||||||
|
_ => Ok(self.to_url()?.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for FileLocation {
|
impl Display for FileLocation {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::RelativeUrl(_base, url) => Display::fmt(&url, f),
|
Self::RelativeUrl(_base, url) => Display::fmt(&url, f),
|
||||||
Self::AbsoluteUrl(url) => Display::fmt(&url, f),
|
Self::AbsoluteUrl(url) => Display::fmt(&url.0, f),
|
||||||
Self::Path(path) => Display::fmt(&path.display(), f),
|
Self::Path(path) => Display::fmt(&path.display(), f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A [`Url`] represented as a `String`.
|
||||||
|
///
|
||||||
|
/// This type is guaranteed to be a valid URL but avoids being parsed into the [`Url`] type.
|
||||||
|
#[derive(
|
||||||
|
Debug,
|
||||||
|
Clone,
|
||||||
|
Serialize,
|
||||||
|
Deserialize,
|
||||||
|
rkyv::Archive,
|
||||||
|
rkyv::Deserialize,
|
||||||
|
rkyv::Serialize,
|
||||||
|
PartialEq,
|
||||||
|
Eq,
|
||||||
|
)]
|
||||||
|
#[archive(check_bytes)]
|
||||||
|
#[archive_attr(derive(Debug))]
|
||||||
|
pub struct UrlString(String);
|
||||||
|
|
||||||
|
impl UrlString {
|
||||||
|
/// Converts a [`UrlString`] to a [`Url`].
|
||||||
|
pub fn to_url(&self) -> Url {
|
||||||
|
// This conversion can never fail as the only way to construct a `UrlString` is from a `Url`.
|
||||||
|
Url::from_str(&self.0).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsRef<str> for UrlString {
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Url> for UrlString {
|
||||||
|
fn from(value: Url) -> Self {
|
||||||
|
UrlString(value.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<UrlString> for String {
|
||||||
|
fn from(value: UrlString) -> Self {
|
||||||
|
value.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for UrlString {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
fmt::Display::fmt(&self.0, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// An error that occurs when a `FileLocation` is not a valid URL.
|
/// An error that occurs when a `FileLocation` is not a valid URL.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
|
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
|
||||||
pub enum ToUrlError {
|
pub enum ToUrlError {
|
||||||
|
|
|
@ -421,7 +421,7 @@ impl RegistryClient {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FileLocation::AbsoluteUrl(url) => {
|
FileLocation::AbsoluteUrl(url) => {
|
||||||
let url = Url::parse(url).map_err(ErrorKind::UrlParse)?;
|
let url = url.to_url();
|
||||||
if url.scheme() == "file" {
|
if url.scheme() == "file" {
|
||||||
let path = url
|
let path = url
|
||||||
.to_file_path()
|
.to_file_path()
|
||||||
|
|
|
@ -160,9 +160,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
|
||||||
FileLocation::RelativeUrl(base, url) => {
|
FileLocation::RelativeUrl(base, url) => {
|
||||||
pypi_types::base_url_join_relative(base, url)?
|
pypi_types::base_url_join_relative(base, url)?
|
||||||
}
|
}
|
||||||
FileLocation::AbsoluteUrl(url) => {
|
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
|
||||||
}
|
|
||||||
FileLocation::Path(path) => {
|
FileLocation::Path(path) => {
|
||||||
let cache_entry = self.build_context.cache().entry(
|
let cache_entry = self.build_context.cache().entry(
|
||||||
CacheBucket::Wheels,
|
CacheBucket::Wheels,
|
||||||
|
|
|
@ -102,9 +102,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
||||||
FileLocation::RelativeUrl(base, url) => {
|
FileLocation::RelativeUrl(base, url) => {
|
||||||
pypi_types::base_url_join_relative(base, url)?
|
pypi_types::base_url_join_relative(base, url)?
|
||||||
}
|
}
|
||||||
FileLocation::AbsoluteUrl(url) => {
|
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
|
||||||
}
|
|
||||||
FileLocation::Path(path) => {
|
FileLocation::Path(path) => {
|
||||||
let url = Url::from_file_path(path)
|
let url = Url::from_file_path(path)
|
||||||
.map_err(|()| Error::RelativePath(path.clone()))?;
|
.map_err(|()| Error::RelativePath(path.clone()))?;
|
||||||
|
@ -280,9 +278,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
|
||||||
FileLocation::RelativeUrl(base, url) => {
|
FileLocation::RelativeUrl(base, url) => {
|
||||||
pypi_types::base_url_join_relative(base, url)?
|
pypi_types::base_url_join_relative(base, url)?
|
||||||
}
|
}
|
||||||
FileLocation::AbsoluteUrl(url) => {
|
FileLocation::AbsoluteUrl(url) => url.to_url(),
|
||||||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
|
||||||
}
|
|
||||||
FileLocation::Path(path) => {
|
FileLocation::Path(path) => {
|
||||||
let url = Url::from_file_path(path)
|
let url = Url::from_file_path(path)
|
||||||
.map_err(|()| Error::RelativePath(path.clone()))?;
|
.map_err(|()| Error::RelativePath(path.clone()))?;
|
||||||
|
|
|
@ -17,7 +17,7 @@ use distribution_filename::WheelFilename;
|
||||||
use distribution_types::{
|
use distribution_types::{
|
||||||
BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist, Dist, FileLocation,
|
BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist, Dist, FileLocation,
|
||||||
GitSourceDist, IndexUrl, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistryBuiltWheel,
|
GitSourceDist, IndexUrl, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistryBuiltWheel,
|
||||||
RegistrySourceDist, RemoteSource, Resolution, ResolvedDist, ToUrlError,
|
RegistrySourceDist, RemoteSource, Resolution, ResolvedDist, ToUrlError, UrlString,
|
||||||
};
|
};
|
||||||
use pep440_rs::{Version, VersionSpecifiers};
|
use pep440_rs::{Version, VersionSpecifiers};
|
||||||
use pep508_rs::{
|
use pep508_rs::{
|
||||||
|
@ -716,7 +716,7 @@ impl Distribution {
|
||||||
requires_python: None,
|
requires_python: None,
|
||||||
size: sdist.size(),
|
size: sdist.size(),
|
||||||
upload_time_utc_ms: None,
|
upload_time_utc_ms: None,
|
||||||
url: FileLocation::AbsoluteUrl(file_url.to_string()),
|
url: FileLocation::AbsoluteUrl(file_url.clone().into()),
|
||||||
yanked: None,
|
yanked: None,
|
||||||
});
|
});
|
||||||
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
||||||
|
@ -1705,7 +1705,7 @@ struct Wheel {
|
||||||
/// against was found. The location does not need to exist in the future,
|
/// against was found. The location does not need to exist in the future,
|
||||||
/// so this should be treated as only a hint to where to look and/or
|
/// so this should be treated as only a hint to where to look and/or
|
||||||
/// recording where the wheel file originally came from.
|
/// recording where the wheel file originally came from.
|
||||||
url: Url,
|
url: UrlString,
|
||||||
/// A hash of the built distribution.
|
/// A hash of the built distribution.
|
||||||
///
|
///
|
||||||
/// This is only present for wheels that come from registries and direct
|
/// This is only present for wheels that come from registries and direct
|
||||||
|
@ -1773,7 +1773,7 @@ impl Wheel {
|
||||||
let url = wheel
|
let url = wheel
|
||||||
.file
|
.file
|
||||||
.url
|
.url
|
||||||
.to_url()
|
.to_url_string()
|
||||||
.map_err(LockErrorKind::InvalidFileUrl)
|
.map_err(LockErrorKind::InvalidFileUrl)
|
||||||
.map_err(LockError::from)?;
|
.map_err(LockError::from)?;
|
||||||
let hash = wheel.file.hashes.first().cloned().map(Hash::from);
|
let hash = wheel.file.hashes.first().cloned().map(Hash::from);
|
||||||
|
@ -1788,7 +1788,7 @@ impl Wheel {
|
||||||
|
|
||||||
fn from_direct_dist(direct_dist: &DirectUrlBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
fn from_direct_dist(direct_dist: &DirectUrlBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
||||||
Wheel {
|
Wheel {
|
||||||
url: direct_dist.url.to_url(),
|
url: direct_dist.url.to_url().into(),
|
||||||
hash: hashes.first().cloned().map(Hash::from),
|
hash: hashes.first().cloned().map(Hash::from),
|
||||||
size: None,
|
size: None,
|
||||||
filename: direct_dist.filename.clone(),
|
filename: direct_dist.filename.clone(),
|
||||||
|
@ -1797,7 +1797,7 @@ impl Wheel {
|
||||||
|
|
||||||
fn from_path_dist(path_dist: &PathBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
fn from_path_dist(path_dist: &PathBuiltDist, hashes: &[HashDigest]) -> Wheel {
|
||||||
Wheel {
|
Wheel {
|
||||||
url: path_dist.url.to_url(),
|
url: path_dist.url.to_url().into(),
|
||||||
hash: hashes.first().cloned().map(Hash::from),
|
hash: hashes.first().cloned().map(Hash::from),
|
||||||
size: None,
|
size: None,
|
||||||
filename: path_dist.filename.clone(),
|
filename: path_dist.filename.clone(),
|
||||||
|
@ -1813,7 +1813,7 @@ impl Wheel {
|
||||||
requires_python: None,
|
requires_python: None,
|
||||||
size: self.size,
|
size: self.size,
|
||||||
upload_time_utc_ms: None,
|
upload_time_utc_ms: None,
|
||||||
url: FileLocation::AbsoluteUrl(self.url.to_string()),
|
url: FileLocation::AbsoluteUrl(self.url.clone()),
|
||||||
yanked: None,
|
yanked: None,
|
||||||
});
|
});
|
||||||
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
let index = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
|
||||||
|
@ -1872,7 +1872,7 @@ impl TryFrom<WheelWire> for Wheel {
|
||||||
.map_err(|err| format!("failed to parse `{filename}` as wheel filename: {err}"))?;
|
.map_err(|err| format!("failed to parse `{filename}` as wheel filename: {err}"))?;
|
||||||
|
|
||||||
Ok(Wheel {
|
Ok(Wheel {
|
||||||
url: wire.url,
|
url: wire.url.into(),
|
||||||
hash: wire.hash,
|
hash: wire.hash,
|
||||||
size: wire.size,
|
size: wire.size,
|
||||||
filename,
|
filename,
|
||||||
|
|
|
@ -19,17 +19,9 @@ Ok(
|
||||||
sdist: None,
|
sdist: None,
|
||||||
wheels: [
|
wheels: [
|
||||||
Wheel {
|
Wheel {
|
||||||
url: Url {
|
url: UrlString(
|
||||||
scheme: "file",
|
"file:///foo/bar/anyio-4.3.0-py3-none-any.whl",
|
||||||
cannot_be_a_base: false,
|
),
|
||||||
username: "",
|
|
||||||
password: None,
|
|
||||||
host: None,
|
|
||||||
port: None,
|
|
||||||
path: "/foo/bar/anyio-4.3.0-py3-none-any.whl",
|
|
||||||
query: None,
|
|
||||||
fragment: None,
|
|
||||||
},
|
|
||||||
hash: Some(
|
hash: Some(
|
||||||
Hash(
|
Hash(
|
||||||
HashDigest {
|
HashDigest {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue