Add parsed URL fields to Dist variants (#3429)

Avoid reparsing urls by storing the parsed parts across resolution on
`Dist`.

Part 2 of https://github.com/astral-sh/uv/issues/3408 and part of #3409

Closes #3408
This commit is contained in:
konsti 2024-05-14 03:23:27 +02:00 committed by GitHub
parent 0010954ca7
commit c22c7cad4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 107 additions and 83 deletions

1
Cargo.lock generated
View file

@ -4896,6 +4896,7 @@ dependencies = [
"uv-distribution",
"uv-extract",
"uv-fs",
"uv-git",
"uv-interpreter",
"uv-normalize",
"uv-types",

View file

@ -41,7 +41,8 @@ use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use pep440_rs::Version;
use pep508_rs::{Pep508Url, Scheme, VerbatimUrl};
use pep508_rs::{Pep508Url, VerbatimUrl};
use uv_git::GitUrl;
use uv_normalize::PackageName;
pub use crate::annotation::*;
@ -126,9 +127,9 @@ impl std::fmt::Display for InstalledVersion<'_> {
}
}
/// Either a built distribution, a wheel, or a source distribution that exists at some location
/// Either a built distribution, a wheel, or a source distribution that exists at some location.
///
/// The location can be index, url or path (wheel) or index, url, path or git (source distribution)
/// The location can be an index, URL or path (wheel), or index, URL, path or Git repository (source distribution).
#[derive(Debug, Clone)]
pub enum Dist {
Built(BuiltDist),
@ -169,6 +170,10 @@ pub struct DirectUrlBuiltDist {
/// We require that wheel urls end in the full wheel filename, e.g.
/// `https://example.org/packages/flask-3.0.0-py3-none-any.whl`
pub filename: WheelFilename,
/// The URL without the subdirectory fragment.
pub location: Url,
pub subdirectory: Option<PathBuf>,
/// The URL with the subdirectory fragment.
pub url: VerbatimUrl,
}
@ -194,6 +199,10 @@ pub struct DirectUrlSourceDist {
/// Unlike [`DirectUrlBuiltDist`], we can't require a full filename with a version here, people
/// like using e.g. `foo @ https://github.com/org/repo/archive/master.zip`
pub name: PackageName,
/// The URL without the subdirectory fragment.
pub location: Url,
pub subdirectory: Option<PathBuf>,
/// The URL with the subdirectory fragment.
pub url: VerbatimUrl,
}
@ -201,6 +210,10 @@ pub struct DirectUrlSourceDist {
#[derive(Debug, Clone)]
pub struct GitSourceDist {
pub name: PackageName,
/// The URL without the revision and subdirectory fragment.
pub git: Box<GitUrl>,
pub subdirectory: Option<PathBuf>,
/// The URL with the revision and subdirectory fragment.
pub url: VerbatimUrl,
}
@ -244,7 +257,12 @@ impl Dist {
/// A remote built distribution (`.whl`) or source distribution from a `http://` or `https://`
/// URL.
pub fn from_http_url(name: PackageName, url: VerbatimUrl) -> Result<Dist, Error> {
pub fn from_http_url(
name: PackageName,
location: Url,
subdirectory: Option<PathBuf>,
url: VerbatimUrl,
) -> Result<Dist, Error> {
if Path::new(url.path())
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
@ -261,11 +279,15 @@ impl Dist {
Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename,
location,
subdirectory,
url,
})))
} else {
Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist {
name,
location,
subdirectory,
url,
})))
}
@ -274,15 +296,12 @@ impl Dist {
/// A local built or source distribution from a `file://` URL.
pub fn from_file_url(
name: PackageName,
url: VerbatimUrl,
path: &Path,
editable: bool,
url: VerbatimUrl,
) -> Result<Dist, Error> {
// Store the canonicalized path, which also serves to validate that it exists.
let path = match url
.to_file_path()
.map_err(|()| Error::UrlFilename(url.to_url()))?
.canonicalize()
{
let path = match path.canonicalize() {
Ok(path) => path,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(Error::NotFound(url.to_url()));
@ -335,67 +354,32 @@ impl Dist {
}
/// A remote source distribution from a `git+https://` or `git+ssh://` url.
pub fn from_git_url(name: PackageName, url: VerbatimUrl) -> Result<Dist, Error> {
Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url })))
pub fn from_git_url(
name: PackageName,
url: VerbatimUrl,
git: GitUrl,
subdirectory: Option<PathBuf>,
) -> Result<Dist, Error> {
Ok(Self::Source(SourceDist::Git(GitSourceDist {
name,
git: Box::new(git),
subdirectory,
url,
})))
}
// TODO(konsti): We should carry the parsed URL through the codebase.
/// Create a [`Dist`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result<Self, Error> {
match Scheme::parse(url.verbatim.scheme()) {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url.verbatim),
Some(Scheme::File) => Self::from_file_url(name, url.verbatim, false),
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url.verbatim),
Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"insecure Git protocol; use `git+https` or `git+ssh` instead".to_string(),
)),
Some(Scheme::GitFile) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"local Git protocol".to_string(),
)),
Some(
Scheme::BzrHttp
| Scheme::BzrHttps
| Scheme::BzrSsh
| Scheme::BzrSftp
| Scheme::BzrFtp
| Scheme::BzrLp
| Scheme::BzrFile,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Bazaar is not supported".to_string(),
)),
Some(
Scheme::HgFile
| Scheme::HgHttp
| Scheme::HgHttps
| Scheme::HgSsh
| Scheme::HgStaticHttp,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Mercurial is not supported".to_string(),
)),
Some(
Scheme::SvnSsh
| Scheme::SvnHttp
| Scheme::SvnHttps
| Scheme::SvnSvn
| Scheme::SvnFile,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Subversion is not supported".to_string(),
)),
None => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"unknown scheme".to_string(),
)),
match url.parsed_url {
ParsedUrl::Archive(archive) => {
Self::from_http_url(name, archive.url, archive.subdirectory, url.verbatim)
}
ParsedUrl::LocalFile(file) => {
Self::from_file_url(name, &file.path, false, url.verbatim)
}
ParsedUrl::Git(git) => {
Self::from_git_url(name, url.verbatim, git.url, git.subdirectory)
}
}
}
@ -1150,20 +1134,18 @@ mod test {
/// Ensure that we don't accidentally grow the `Dist` sizes.
#[test]
fn dist_size() {
// At time of writing, Unix is at 240, Windows is at 248.
assert!(
std::mem::size_of::<Dist>() <= 248,
std::mem::size_of::<Dist>() <= 336,
"{}",
std::mem::size_of::<Dist>()
);
assert!(
std::mem::size_of::<BuiltDist>() <= 248,
std::mem::size_of::<BuiltDist>() <= 336,
"{}",
std::mem::size_of::<BuiltDist>()
);
// At time of writing, unix is at 168, windows is at 176.
assert!(
std::mem::size_of::<SourceDist>() <= 176,
std::mem::size_of::<SourceDist>() <= 256,
"{}",
std::mem::size_of::<SourceDist>()
);

View file

@ -1,6 +1,7 @@
use std::str::FromStr;
use anyhow::Result;
use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
@ -20,6 +21,8 @@ async fn remote_metadata_with_and_without_cache() -> Result<()> {
let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1)?;
let dist = BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename,
location: Url::parse(url).unwrap(),
subdirectory: None,
url: VerbatimUrl::from_str(url).unwrap(),
});
let metadata = client.wheel_metadata(&dist).await.unwrap();

View file

@ -1,11 +1,11 @@
use std::str::FromStr;
use anstream::println;
use anyhow::Result;
use anyhow::{bail, Result};
use clap::Parser;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
use distribution_types::{BuiltDist, DirectUrlBuiltDist, ParsedUrl};
use pep508_rs::VerbatimUrl;
use uv_cache::{Cache, CacheArgs};
use uv_client::RegistryClientBuilder;
@ -30,9 +30,15 @@ pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
.1,
)?;
let ParsedUrl::Archive(archive) = ParsedUrl::try_from(args.url.to_url())? else {
bail!("Only HTTPS is supported");
};
let metadata = client
.wheel_metadata(&BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename,
location: archive.url,
subdirectory: archive.subdirectory,
url: args.url,
}))
.await?;

View file

@ -27,6 +27,7 @@ uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-extract = { workspace = true }
uv-fs = { workspace = true }
uv-git = { workspace = true }
uv-interpreter = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }

View file

@ -21,6 +21,7 @@ use uv_distribution::{
BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex,
};
use uv_fs::Simplified;
use uv_git::GitUrl;
use uv_interpreter::PythonEnvironment;
use uv_types::HashStrategy;
@ -224,7 +225,11 @@ impl<'a> Planner<'a> {
continue;
}
}
RequirementSource::Url { url, .. } => {
RequirementSource::Url {
subdirectory,
location,
url,
} => {
// Check if we have a wheel or a source distribution.
if Path::new(url.path())
.extension()
@ -243,6 +248,8 @@ impl<'a> Planner<'a> {
let wheel = DirectUrlBuiltDist {
filename,
location: location.clone(),
subdirectory: subdirectory.clone(),
url: url.clone(),
};
@ -288,6 +295,8 @@ impl<'a> Planner<'a> {
} else {
let sdist = DirectUrlSourceDist {
name: requirement.name.clone(),
location: location.clone(),
subdirectory: subdirectory.clone(),
url: url.clone(),
};
// Find the most-compatible wheel from the cache, since we don't know
@ -300,9 +309,16 @@ impl<'a> Planner<'a> {
}
}
}
RequirementSource::Git { url, .. } => {
RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => {
let sdist = GitSourceDist {
name: requirement.name.clone(),
git: Box::new(GitUrl::new(repository.clone(), reference.clone())),
subdirectory: subdirectory.clone(),
url: url.clone(),
};
// Find the most-compatible wheel from the cache, since we don't know

View file

@ -13,6 +13,7 @@ use pep508_rs::MarkerEnvironment;
use pypi_types::Metadata23;
use uv_configuration::{Constraints, Overrides};
use uv_distribution::{DistributionDatabase, Reporter};
use uv_git::GitUrl;
use uv_resolver::{InMemoryIndex, MetadataResponse};
use uv_types::{BuildContext, HashStrategy, RequestedRequirements};
@ -162,16 +163,28 @@ impl<'a, Context: BuildContext> LookaheadResolver<'a, Context> {
// buildable distribution.
let dist = match requirement.source {
RequirementSource::Registry { .. } => return Ok(None),
RequirementSource::Url { url, .. } => Dist::from_http_url(requirement.name, url)?,
RequirementSource::Git { url, .. } => Dist::Source(SourceDist::Git(GitSourceDist {
RequirementSource::Url {
subdirectory,
location,
url,
} => Dist::from_http_url(requirement.name, location, subdirectory, url)?,
RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => Dist::Source(SourceDist::Git(GitSourceDist {
name: requirement.name,
git: Box::new(GitUrl::new(repository, reference)),
subdirectory,
url,
})),
RequirementSource::Path {
path: _,
path,
url,
// TODO(konsti): Figure out why we lose the editable here (does it matter?)
editable: _,
} => Dist::from_file_url(requirement.name, url, false)?,
} => Dist::from_file_url(requirement.name, &path, false, url)?,
};
// Fetch the metadata for the distribution.

View file

@ -238,9 +238,11 @@ impl<'a, Context: BuildContext> NamedRequirementsResolver<'a, Context> {
Some(Scheme::Http | Scheme::Https) => SourceUrl::Direct(DirectSourceUrl {
url: &requirement.url,
}),
Some(Scheme::GitSsh | Scheme::GitHttps) => SourceUrl::Git(GitSourceUrl {
url: &requirement.url,
}),
Some(Scheme::GitSsh | Scheme::GitHttps | Scheme::GitHttp) => {
SourceUrl::Git(GitSourceUrl {
url: &requirement.url,
})
}
_ => {
return Err(anyhow::anyhow!(
"Unsupported scheme for unnamed requirement: {}",