Add parsed URL to PubGrubPackage (#3426)

Avoid reparsing urls by storing the parsed parts across resolution on
`PubGrubPackage`.

Part 1 of #3408
This commit is contained in:
konsti 2024-05-14 02:55:21 +02:00 committed by GitHub
parent 5132c6a6e2
commit 0010954ca7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 194 additions and 79 deletions

View file

@ -25,12 +25,13 @@ pypi-types = { workspace = true }
requirements-txt = { workspace = true }
uv-cache = { workspace = true }
uv-client = { workspace = true }
uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-git = { workspace = true }
uv-interpreter = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
uv-configuration = { workspace = true }
anstream = { workspace = true }
anyhow = { workspace = true }

View file

@ -235,10 +235,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
@ -260,10 +260,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
@ -285,10 +285,10 @@ impl PubGrubRequirement {
));
};
if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}

View file

@ -1,12 +1,11 @@
use distribution_types::{DistributionMetadata, Name, VersionOrUrlRef};
use distribution_types::{DistributionMetadata, Name, VerbatimParsedUrl, VersionOrUrlRef};
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use uv_normalize::PackageName;
#[derive(Debug)]
pub(crate) enum PubGrubDistribution<'a> {
Registry(&'a PackageName, &'a Version),
Url(&'a PackageName, &'a VerbatimUrl),
Url(&'a PackageName, &'a VerbatimParsedUrl),
}
impl<'a> PubGrubDistribution<'a> {
@ -14,7 +13,7 @@ impl<'a> PubGrubDistribution<'a> {
Self::Registry(name, version)
}
pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimUrl) -> Self {
pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimParsedUrl) -> Self {
Self::Url(name, url)
}
}
@ -32,7 +31,7 @@ impl DistributionMetadata for PubGrubDistribution<'_> {
fn version_or_url(&self) -> VersionOrUrlRef {
match self {
Self::Registry(_, version) => VersionOrUrlRef::Version(version),
Self::Url(_, url) => VersionOrUrlRef::Url(url),
Self::Url(_, url) => VersionOrUrlRef::Url(&url.verbatim),
}
}
}

View file

@ -1,6 +1,6 @@
use derivative::Derivative;
use pep508_rs::VerbatimUrl;
use distribution_types::VerbatimParsedUrl;
use uv_normalize::{ExtraName, PackageName};
use crate::resolver::Urls;
@ -59,7 +59,7 @@ pub enum PubGrubPackage {
/// we're going to have a dependency that's provided as a URL, we _need_ to visit the URL
/// version before the registry version. So we could just error if we visit a URL variant
/// _after_ a registry variant.
Option<VerbatimUrl>,
Option<VerbatimParsedUrl>,
),
/// A proxy package to represent a dependency with an extra (e.g., `black[colorama]`).
///
@ -74,7 +74,7 @@ pub enum PubGrubPackage {
/// the exact same version of the base variant. Without the proxy package, then when provided
/// requirements like `black==23.0.1` and `black[colorama]`, PubGrub may attempt to retrieve
/// metadata for `black[colorama]` versions other than `23.0.1`.
Extra(PackageName, ExtraName, Option<VerbatimUrl>),
Extra(PackageName, ExtraName, Option<VerbatimParsedUrl>),
}
impl PubGrubPackage {

View file

@ -1,10 +1,13 @@
use url::Url;
use distribution_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl};
use pep508_rs::VerbatimUrl;
use uv_distribution::git_url_to_precise;
use uv_git::{GitReference, GitUrl};
/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
let redirect = VerbatimUrl::from_url(redirect);
// The redirect should be the "same" URL, but with a specific commit hash added after the `@`.
@ -36,9 +39,53 @@ pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
redirect
}
pub(crate) fn url_to_precise(url: VerbatimParsedUrl) -> VerbatimParsedUrl {
let ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory,
}) = url.parsed_url.clone()
else {
return url;
};
// TODO(konsti): Remove once we carry more context on the `Dist`s.
let lowered_git_ref = git_url
.reference()
.as_str()
.map_or(GitReference::DefaultBranch, |rev| {
GitReference::from_rev(rev)
});
let git_url = GitUrl::new(git_url.repository().clone(), lowered_git_ref);
let Some(new_git_url) = git_url_to_precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen git url: {}, {:?}",
url.verbatim,
git_url
);
return url;
};
let new_parsed_url = ParsedGitUrl {
url: new_git_url,
subdirectory,
};
let new_url = Url::from(new_parsed_url.clone());
let new_verbatim_url = apply_redirect(&url.verbatim, new_url);
VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(new_parsed_url),
verbatim: new_verbatim_url,
}
}
#[cfg(test)]
mod tests {
use super::*;
use url::Url;
use pep508_rs::VerbatimUrl;
use crate::redirect::apply_redirect;
#[test]
fn test_apply_redirect() -> Result<(), url::ParseError> {

View file

@ -21,7 +21,6 @@ use once_map::OnceMap;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::HashDigest;
use uv_distribution::to_precise;
use uv_normalize::{ExtraName, PackageName};
use crate::dependency_provider::UvDependencyProvider;
@ -30,7 +29,7 @@ use crate::lock::{self, Lock, LockError};
use crate::pins::FilePins;
use crate::preferences::Preferences;
use crate::pubgrub::{PubGrubDistribution, PubGrubPackage};
use crate::redirect::apply_redirect;
use crate::redirect::url_to_precise;
use crate::resolver::{InMemoryIndex, MetadataResponse, VersionsResponse};
use crate::{Manifest, ResolveError};
@ -129,9 +128,7 @@ impl ResolutionGraph {
{
Dist::from_editable(package_name.clone(), editable.clone())?
} else {
let url = to_precise(url)
.map_or_else(|| url.clone(), |precise| apply_redirect(url, precise));
Dist::from_url(package_name.clone(), url)?
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?
};
// Add its hashes to the index, preserving those that were already present in
@ -249,11 +246,8 @@ impl ResolutionGraph {
.or_insert_with(Vec::new)
.push(extra.clone());
} else {
let url = to_precise(url).map_or_else(
|| url.clone(),
|precise| apply_redirect(url, precise),
);
let pinned_package = Dist::from_url(package_name.clone(), url)?;
let pinned_package =
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?;
diagnostics.push(Diagnostic::MissingExtra {
dist: pinned_package.into(),

View file

@ -612,7 +612,7 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
}
PubGrubPackage::Package(name, _extra, Some(url)) => {
// Verify that the package is allowed under the hash-checking policy.
if !self.hasher.allows_url(url) {
if !self.hasher.allows_url(&url.verbatim) {
return Err(ResolveError::UnhashedPackage(name.clone()));
}
@ -685,7 +685,10 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
PubGrubPackage::Extra(package_name, _, Some(url))
| PubGrubPackage::Package(package_name, _, Some(url)) => {
debug!("Searching for a compatible version of {package} @ {url} ({range})");
debug!(
"Searching for a compatible version of {package} @ {} ({range})",
url.verbatim
);
// If the dist is an editable, return the version from the editable metadata.
if let Some((_local, metadata, _)) = self.editables.get(package_name) {
@ -1373,7 +1376,7 @@ impl<'a, Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvide
PubGrubPackage::Python(_) => {}
PubGrubPackage::Extra(_, _, _) => {}
PubGrubPackage::Package(package_name, _extra, Some(url)) => {
reporter.on_progress(package_name, &VersionOrUrlRef::Url(url));
reporter.on_progress(package_name, &VersionOrUrlRef::Url(&url.verbatim));
}
PubGrubPackage::Package(package_name, _extra, None) => {
reporter.on_progress(package_name, &VersionOrUrlRef::Version(version));

View file

@ -1,16 +1,20 @@
use rustc_hash::FxHashMap;
use tracing::debug;
use distribution_types::{RequirementSource, Verbatim};
use distribution_types::{
ParsedArchiveUrl, ParsedGitUrl, ParsedLocalFileUrl, ParsedUrl, RequirementSource, Verbatim,
VerbatimParsedUrl,
};
use pep508_rs::{MarkerEnvironment, VerbatimUrl};
use uv_distribution::is_same_reference;
use uv_git::GitUrl;
use uv_normalize::PackageName;
use crate::{DependencyMode, Manifest, ResolveError};
/// A map of package names to their associated, required URLs.
#[derive(Debug, Default)]
pub(crate) struct Urls(FxHashMap<PackageName, VerbatimUrl>);
pub(crate) struct Urls(FxHashMap<PackageName, VerbatimParsedUrl>);
impl Urls {
pub(crate) fn from_manifest(
@ -18,19 +22,30 @@ impl Urls {
markers: Option<&MarkerEnvironment>,
dependencies: DependencyMode,
) -> Result<Self, ResolveError> {
let mut urls: FxHashMap<PackageName, VerbatimUrl> = FxHashMap::default();
let mut urls: FxHashMap<PackageName, VerbatimParsedUrl> = FxHashMap::default();
// Add the editables themselves to the list of required URLs.
for (editable, metadata, _) in &manifest.editables {
if let Some(previous) = urls.insert(metadata.name.clone(), editable.url.clone()) {
if !is_equal(&previous, &editable.url) {
if is_same_reference(&previous, &editable.url) {
debug!("Allowing {} as a variant of {previous}", editable.url);
let editable_url = VerbatimParsedUrl {
parsed_url: ParsedUrl::LocalFile(ParsedLocalFileUrl {
url: editable.url.to_url(),
path: editable.path.clone(),
editable: true,
}),
verbatim: editable.url.clone(),
};
if let Some(previous) = urls.insert(metadata.name.clone(), editable_url.clone()) {
if !is_equal(&previous.verbatim, &editable_url.verbatim) {
if is_same_reference(&previous.verbatim, &editable_url.verbatim) {
debug!(
"Allowing {} as a variant of {}",
editable_url.verbatim, previous.verbatim
);
} else {
return Err(ResolveError::ConflictingUrlsDirect(
metadata.name.clone(),
previous.verbatim().to_string(),
editable.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
editable_url.verbatim.verbatim().to_string(),
));
}
}
@ -41,27 +56,76 @@ impl Urls {
for requirement in manifest.requirements(markers, dependencies) {
match &requirement.source {
RequirementSource::Registry { .. } => {}
RequirementSource::Url { url, .. } | RequirementSource::Path { url, .. } => {
RequirementSource::Url {
subdirectory,
location,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Archive(ParsedArchiveUrl {
url: location.clone(),
subdirectory: subdirectory.clone(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous, url) {
if !is_equal(&previous.verbatim, &url.verbatim) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim().to_string(),
url.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
}
RequirementSource::Git { url, .. } => {
RequirementSource::Path {
path,
editable,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::LocalFile(ParsedLocalFileUrl {
url: url.to_url(),
path: path.clone(),
editable: (*editable).unwrap_or_default(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous, url) {
if is_same_reference(&previous, url) {
debug!("Allowing {url} as a variant of {previous}");
if !is_equal(&previous.verbatim, &url.verbatim) {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
}
RequirementSource::Git {
repository,
reference,
subdirectory,
url,
} => {
let url = VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(ParsedGitUrl {
url: GitUrl::new(repository.clone(), reference.clone()),
subdirectory: subdirectory.clone(),
}),
verbatim: url.clone(),
};
if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) {
if !is_equal(&previous.verbatim, &url.verbatim) {
if is_same_reference(&previous.verbatim, &url.verbatim) {
debug!(
"Allowing {} as a variant of {}",
&url.verbatim, previous.verbatim
);
} else {
return Err(ResolveError::ConflictingUrlsDirect(
requirement.name.clone(),
previous.verbatim().to_string(),
url.verbatim().to_string(),
previous.verbatim.verbatim().to_string(),
url.verbatim.verbatim().to_string(),
));
}
}
@ -74,7 +138,7 @@ impl Urls {
}
/// Return the [`VerbatimUrl`] associated with the given package name, if any.
pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimUrl> {
pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> {
self.0.get(package)
}