Add parsed URL fields to Dist variants (#3429)

Avoid reparsing urls by storing the parsed parts across resolution on
`Dist`.

Part 2 of https://github.com/astral-sh/uv/issues/3408 and part of #3409

Closes #3408
This commit is contained in:
konsti 2024-05-14 03:23:27 +02:00 committed by GitHub
parent 0010954ca7
commit c22c7cad4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 107 additions and 83 deletions

1
Cargo.lock generated
View file

@ -4896,6 +4896,7 @@ dependencies = [
"uv-distribution", "uv-distribution",
"uv-extract", "uv-extract",
"uv-fs", "uv-fs",
"uv-git",
"uv-interpreter", "uv-interpreter",
"uv-normalize", "uv-normalize",
"uv-types", "uv-types",

View file

@ -41,7 +41,8 @@ use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use pep440_rs::Version; use pep440_rs::Version;
use pep508_rs::{Pep508Url, Scheme, VerbatimUrl}; use pep508_rs::{Pep508Url, VerbatimUrl};
use uv_git::GitUrl;
use uv_normalize::PackageName; use uv_normalize::PackageName;
pub use crate::annotation::*; pub use crate::annotation::*;
@ -126,9 +127,9 @@ impl std::fmt::Display for InstalledVersion<'_> {
} }
} }
/// Either a built distribution, a wheel, or a source distribution that exists at some location /// Either a built distribution, a wheel, or a source distribution that exists at some location.
/// ///
/// The location can be index, url or path (wheel) or index, url, path or git (source distribution) /// The location can be an index, URL or path (wheel), or index, URL, path or Git repository (source distribution).
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum Dist { pub enum Dist {
Built(BuiltDist), Built(BuiltDist),
@ -169,6 +170,10 @@ pub struct DirectUrlBuiltDist {
/// We require that wheel urls end in the full wheel filename, e.g. /// We require that wheel urls end in the full wheel filename, e.g.
/// `https://example.org/packages/flask-3.0.0-py3-none-any.whl` /// `https://example.org/packages/flask-3.0.0-py3-none-any.whl`
pub filename: WheelFilename, pub filename: WheelFilename,
/// The URL without the subdirectory fragment.
pub location: Url,
pub subdirectory: Option<PathBuf>,
/// The URL with the subdirectory fragment.
pub url: VerbatimUrl, pub url: VerbatimUrl,
} }
@ -194,6 +199,10 @@ pub struct DirectUrlSourceDist {
/// Unlike [`DirectUrlBuiltDist`], we can't require a full filename with a version here, people /// Unlike [`DirectUrlBuiltDist`], we can't require a full filename with a version here, people
/// like using e.g. `foo @ https://github.com/org/repo/archive/master.zip` /// like using e.g. `foo @ https://github.com/org/repo/archive/master.zip`
pub name: PackageName, pub name: PackageName,
/// The URL without the subdirectory fragment.
pub location: Url,
pub subdirectory: Option<PathBuf>,
/// The URL with the subdirectory fragment.
pub url: VerbatimUrl, pub url: VerbatimUrl,
} }
@ -201,6 +210,10 @@ pub struct DirectUrlSourceDist {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct GitSourceDist { pub struct GitSourceDist {
pub name: PackageName, pub name: PackageName,
/// The URL without the revision and subdirectory fragment.
pub git: Box<GitUrl>,
pub subdirectory: Option<PathBuf>,
/// The URL with the revision and subdirectory fragment.
pub url: VerbatimUrl, pub url: VerbatimUrl,
} }
@ -244,7 +257,12 @@ impl Dist {
/// A remote built distribution (`.whl`) or source distribution from a `http://` or `https://` /// A remote built distribution (`.whl`) or source distribution from a `http://` or `https://`
/// URL. /// URL.
pub fn from_http_url(name: PackageName, url: VerbatimUrl) -> Result<Dist, Error> { pub fn from_http_url(
name: PackageName,
location: Url,
subdirectory: Option<PathBuf>,
url: VerbatimUrl,
) -> Result<Dist, Error> {
if Path::new(url.path()) if Path::new(url.path())
.extension() .extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
@ -261,11 +279,15 @@ impl Dist {
Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename, filename,
location,
subdirectory,
url, url,
}))) })))
} else { } else {
Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist { Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist {
name, name,
location,
subdirectory,
url, url,
}))) })))
} }
@ -274,15 +296,12 @@ impl Dist {
/// A local built or source distribution from a `file://` URL. /// A local built or source distribution from a `file://` URL.
pub fn from_file_url( pub fn from_file_url(
name: PackageName, name: PackageName,
url: VerbatimUrl, path: &Path,
editable: bool, editable: bool,
url: VerbatimUrl,
) -> Result<Dist, Error> { ) -> Result<Dist, Error> {
// Store the canonicalized path, which also serves to validate that it exists. // Store the canonicalized path, which also serves to validate that it exists.
let path = match url let path = match path.canonicalize() {
.to_file_path()
.map_err(|()| Error::UrlFilename(url.to_url()))?
.canonicalize()
{
Ok(path) => path, Ok(path) => path,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => { Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(Error::NotFound(url.to_url())); return Err(Error::NotFound(url.to_url()));
@ -335,67 +354,32 @@ impl Dist {
} }
/// A remote source distribution from a `git+https://` or `git+ssh://` url. /// A remote source distribution from a `git+https://` or `git+ssh://` url.
pub fn from_git_url(name: PackageName, url: VerbatimUrl) -> Result<Dist, Error> { pub fn from_git_url(
Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url }))) name: PackageName,
url: VerbatimUrl,
git: GitUrl,
subdirectory: Option<PathBuf>,
) -> Result<Dist, Error> {
Ok(Self::Source(SourceDist::Git(GitSourceDist {
name,
git: Box::new(git),
subdirectory,
url,
})))
} }
// TODO(konsti): We should carry the parsed URL through the codebase.
/// Create a [`Dist`] for a URL-based distribution. /// Create a [`Dist`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result<Self, Error> { pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result<Self, Error> {
match Scheme::parse(url.verbatim.scheme()) { match url.parsed_url {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url.verbatim), ParsedUrl::Archive(archive) => {
Some(Scheme::File) => Self::from_file_url(name, url.verbatim, false), Self::from_http_url(name, archive.url, archive.subdirectory, url.verbatim)
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url.verbatim), }
Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme( ParsedUrl::LocalFile(file) => {
url.verbatim.scheme().to_owned(), Self::from_file_url(name, &file.path, false, url.verbatim)
url.verbatim.verbatim().to_string(), }
"insecure Git protocol; use `git+https` or `git+ssh` instead".to_string(), ParsedUrl::Git(git) => {
)), Self::from_git_url(name, url.verbatim, git.url, git.subdirectory)
Some(Scheme::GitFile) => Err(Error::UnsupportedScheme( }
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"local Git protocol".to_string(),
)),
Some(
Scheme::BzrHttp
| Scheme::BzrHttps
| Scheme::BzrSsh
| Scheme::BzrSftp
| Scheme::BzrFtp
| Scheme::BzrLp
| Scheme::BzrFile,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Bazaar is not supported".to_string(),
)),
Some(
Scheme::HgFile
| Scheme::HgHttp
| Scheme::HgHttps
| Scheme::HgSsh
| Scheme::HgStaticHttp,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Mercurial is not supported".to_string(),
)),
Some(
Scheme::SvnSsh
| Scheme::SvnHttp
| Scheme::SvnHttps
| Scheme::SvnSvn
| Scheme::SvnFile,
) => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Subversion is not supported".to_string(),
)),
None => Err(Error::UnsupportedScheme(
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"unknown scheme".to_string(),
)),
} }
} }
@ -1150,20 +1134,18 @@ mod test {
/// Ensure that we don't accidentally grow the `Dist` sizes. /// Ensure that we don't accidentally grow the `Dist` sizes.
#[test] #[test]
fn dist_size() { fn dist_size() {
// At time of writing, Unix is at 240, Windows is at 248.
assert!( assert!(
std::mem::size_of::<Dist>() <= 248, std::mem::size_of::<Dist>() <= 336,
"{}", "{}",
std::mem::size_of::<Dist>() std::mem::size_of::<Dist>()
); );
assert!( assert!(
std::mem::size_of::<BuiltDist>() <= 248, std::mem::size_of::<BuiltDist>() <= 336,
"{}", "{}",
std::mem::size_of::<BuiltDist>() std::mem::size_of::<BuiltDist>()
); );
// At time of writing, unix is at 168, windows is at 176.
assert!( assert!(
std::mem::size_of::<SourceDist>() <= 176, std::mem::size_of::<SourceDist>() <= 256,
"{}", "{}",
std::mem::size_of::<SourceDist>() std::mem::size_of::<SourceDist>()
); );

View file

@ -1,6 +1,7 @@
use std::str::FromStr; use std::str::FromStr;
use anyhow::Result; use anyhow::Result;
use url::Url;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist}; use distribution_types::{BuiltDist, DirectUrlBuiltDist};
@ -20,6 +21,8 @@ async fn remote_metadata_with_and_without_cache() -> Result<()> {
let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1)?; let filename = WheelFilename::from_str(url.rsplit_once('/').unwrap().1)?;
let dist = BuiltDist::DirectUrl(DirectUrlBuiltDist { let dist = BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename, filename,
location: Url::parse(url).unwrap(),
subdirectory: None,
url: VerbatimUrl::from_str(url).unwrap(), url: VerbatimUrl::from_str(url).unwrap(),
}); });
let metadata = client.wheel_metadata(&dist).await.unwrap(); let metadata = client.wheel_metadata(&dist).await.unwrap();

View file

@ -1,11 +1,11 @@
use std::str::FromStr; use std::str::FromStr;
use anstream::println; use anstream::println;
use anyhow::Result; use anyhow::{bail, Result};
use clap::Parser; use clap::Parser;
use distribution_filename::WheelFilename; use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist}; use distribution_types::{BuiltDist, DirectUrlBuiltDist, ParsedUrl};
use pep508_rs::VerbatimUrl; use pep508_rs::VerbatimUrl;
use uv_cache::{Cache, CacheArgs}; use uv_cache::{Cache, CacheArgs};
use uv_client::RegistryClientBuilder; use uv_client::RegistryClientBuilder;
@ -30,9 +30,15 @@ pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
.1, .1,
)?; )?;
let ParsedUrl::Archive(archive) = ParsedUrl::try_from(args.url.to_url())? else {
bail!("Only HTTPS is supported");
};
let metadata = client let metadata = client
.wheel_metadata(&BuiltDist::DirectUrl(DirectUrlBuiltDist { .wheel_metadata(&BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename, filename,
location: archive.url,
subdirectory: archive.subdirectory,
url: args.url, url: args.url,
})) }))
.await?; .await?;

View file

@ -27,6 +27,7 @@ uv-configuration = { workspace = true }
uv-distribution = { workspace = true } uv-distribution = { workspace = true }
uv-extract = { workspace = true } uv-extract = { workspace = true }
uv-fs = { workspace = true } uv-fs = { workspace = true }
uv-git = { workspace = true }
uv-interpreter = { workspace = true } uv-interpreter = { workspace = true }
uv-normalize = { workspace = true } uv-normalize = { workspace = true }
uv-types = { workspace = true } uv-types = { workspace = true }

View file

@ -21,6 +21,7 @@ use uv_distribution::{
BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex, BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex,
}; };
use uv_fs::Simplified; use uv_fs::Simplified;
use uv_git::GitUrl;
use uv_interpreter::PythonEnvironment; use uv_interpreter::PythonEnvironment;
use uv_types::HashStrategy; use uv_types::HashStrategy;
@ -224,7 +225,11 @@ impl<'a> Planner<'a> {
continue; continue;
} }
} }
RequirementSource::Url { url, .. } => { RequirementSource::Url {
subdirectory,
location,
url,
} => {
// Check if we have a wheel or a source distribution. // Check if we have a wheel or a source distribution.
if Path::new(url.path()) if Path::new(url.path())
.extension() .extension()
@ -243,6 +248,8 @@ impl<'a> Planner<'a> {
let wheel = DirectUrlBuiltDist { let wheel = DirectUrlBuiltDist {
filename, filename,
location: location.clone(),
subdirectory: subdirectory.clone(),
url: url.clone(), url: url.clone(),
}; };
@ -288,6 +295,8 @@ impl<'a> Planner<'a> {
} else { } else {
let sdist = DirectUrlSourceDist { let sdist = DirectUrlSourceDist {
name: requirement.name.clone(), name: requirement.name.clone(),
location: location.clone(),
subdirectory: subdirectory.clone(),
url: url.clone(), url: url.clone(),
}; };
// Find the most-compatible wheel from the cache, since we don't know // Find the most-compatible wheel from the cache, since we don't know
@ -300,9 +309,16 @@ impl<'a> Planner<'a> {
} }
} }
} }
RequirementSource::Git { url, .. } => { RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => {
let sdist = GitSourceDist { let sdist = GitSourceDist {
name: requirement.name.clone(), name: requirement.name.clone(),
git: Box::new(GitUrl::new(repository.clone(), reference.clone())),
subdirectory: subdirectory.clone(),
url: url.clone(), url: url.clone(),
}; };
// Find the most-compatible wheel from the cache, since we don't know // Find the most-compatible wheel from the cache, since we don't know

View file

@ -13,6 +13,7 @@ use pep508_rs::MarkerEnvironment;
use pypi_types::Metadata23; use pypi_types::Metadata23;
use uv_configuration::{Constraints, Overrides}; use uv_configuration::{Constraints, Overrides};
use uv_distribution::{DistributionDatabase, Reporter}; use uv_distribution::{DistributionDatabase, Reporter};
use uv_git::GitUrl;
use uv_resolver::{InMemoryIndex, MetadataResponse}; use uv_resolver::{InMemoryIndex, MetadataResponse};
use uv_types::{BuildContext, HashStrategy, RequestedRequirements}; use uv_types::{BuildContext, HashStrategy, RequestedRequirements};
@ -162,16 +163,28 @@ impl<'a, Context: BuildContext> LookaheadResolver<'a, Context> {
// buildable distribution. // buildable distribution.
let dist = match requirement.source { let dist = match requirement.source {
RequirementSource::Registry { .. } => return Ok(None), RequirementSource::Registry { .. } => return Ok(None),
RequirementSource::Url { url, .. } => Dist::from_http_url(requirement.name, url)?, RequirementSource::Url {
RequirementSource::Git { url, .. } => Dist::Source(SourceDist::Git(GitSourceDist { subdirectory,
location,
url,
} => Dist::from_http_url(requirement.name, location, subdirectory, url)?,
RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => Dist::Source(SourceDist::Git(GitSourceDist {
name: requirement.name, name: requirement.name,
git: Box::new(GitUrl::new(repository, reference)),
subdirectory,
url, url,
})), })),
RequirementSource::Path { RequirementSource::Path {
path: _, path,
url, url,
// TODO(konsti): Figure out why we lose the editable here (does it matter?)
editable: _, editable: _,
} => Dist::from_file_url(requirement.name, url, false)?, } => Dist::from_file_url(requirement.name, &path, false, url)?,
}; };
// Fetch the metadata for the distribution. // Fetch the metadata for the distribution.

View file

@ -238,9 +238,11 @@ impl<'a, Context: BuildContext> NamedRequirementsResolver<'a, Context> {
Some(Scheme::Http | Scheme::Https) => SourceUrl::Direct(DirectSourceUrl { Some(Scheme::Http | Scheme::Https) => SourceUrl::Direct(DirectSourceUrl {
url: &requirement.url, url: &requirement.url,
}), }),
Some(Scheme::GitSsh | Scheme::GitHttps) => SourceUrl::Git(GitSourceUrl { Some(Scheme::GitSsh | Scheme::GitHttps | Scheme::GitHttp) => {
SourceUrl::Git(GitSourceUrl {
url: &requirement.url, url: &requirement.url,
}), })
}
_ => { _ => {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Unsupported scheme for unnamed requirement: {}", "Unsupported scheme for unnamed requirement: {}",