Remove redirects from the resolver (#2792)

## Summary

Rather than storing the `redirects` on the resolver, this PR just
re-uses the "convert this URL to precise" logic when we convert to a
`Resolution` after-the-fact. I think this is a lot simpler: it removes
state from the resolver, and simplifies a lot of the hooks around
distribution fetching (e.g., `get_or_build_wheel_metadata` no longer
returns `(Metadata23, Option<Url>)`).
This commit is contained in:
Charlie Marsh 2024-04-02 22:43:57 -04:00 committed by GitHub
parent ffd4b6fcac
commit 189d0d41d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 80 additions and 182 deletions

View file

@ -10,10 +10,8 @@ use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{
BuildableSource, BuiltDist, Dist, FileLocation, GitSourceDist, GitSourceUrl, IndexLocations,
LocalEditable, Name, SourceDist, SourceUrl,
BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, SourceDist,
};
use pep508_rs::VerbatimUrl;
use platform_tags::Tags;
use pypi_types::Metadata23;
use uv_cache::{ArchiveTarget, ArchiveTimestamp, CacheBucket, CacheEntry, WheelCache};
@ -21,7 +19,6 @@ use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient};
use uv_types::{BuildContext, NoBinary, NoBuild};
use crate::download::{BuiltWheel, UnzippedWheel};
use crate::git::resolve_precise;
use crate::locks::Locks;
use crate::{DiskWheel, Error, LocalWheel, Reporter, SourceDistributionBuilder};
@ -42,7 +39,6 @@ pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> {
build_context: &'a Context,
builder: SourceDistributionBuilder<'a, Context>,
locks: Arc<Locks>,
reporter: Option<Arc<dyn Reporter>>,
}
impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> {
@ -52,7 +48,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
build_context,
builder: SourceDistributionBuilder::new(client, build_context),
locks: Arc::new(Locks::default()),
reporter: None,
}
}
@ -61,7 +56,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter = Arc::new(reporter);
Self {
reporter: Some(reporter.clone()),
builder: self.builder.with_reporter(reporter),
..self
}
@ -100,15 +94,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
/// possible. For example, given a Git dependency with a reference to a branch or tag, return a
/// URL with a precise reference to the current commit of that branch or tag.
#[instrument(skip_all, fields(%dist))]
pub async fn get_or_build_wheel_metadata(
&self,
dist: &Dist,
) -> Result<(Metadata23, Option<Url>), Error> {
pub async fn get_or_build_wheel_metadata(&self, dist: &Dist) -> Result<Metadata23, Error> {
match dist {
Dist::Built(built) => self
.get_wheel_metadata(built)
.await
.map(|metadata| (metadata, None)),
Dist::Built(built) => self.get_wheel_metadata(built).await,
Dist::Source(source) => {
self.build_wheel_metadata(&BuildableSource::Dist(source))
.await
@ -365,7 +353,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
pub async fn build_wheel_metadata(
&self,
source: &BuildableSource<'_>,
) -> Result<(Metadata23, Option<Url>), Error> {
) -> Result<Metadata23, Error> {
let no_build = match self.build_context.no_build() {
NoBuild::All => true,
NoBuild::None => false,
@ -382,54 +370,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let lock = self.locks.acquire(source).await;
let _guard = lock.lock().await;
// Insert the `precise` URL, if it exists.
if let BuildableSource::Dist(SourceDist::Git(source)) = source {
if let Some(precise) = resolve_precise(
&source.url,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?
{
let source = SourceDist::Git(GitSourceDist {
url: VerbatimUrl::unknown(precise.clone()),
..source.clone()
});
let source = BuildableSource::Dist(&source);
let metadata = self
.builder
.download_and_build_metadata(&source)
.boxed()
.await?;
return Ok((metadata, Some(precise)));
}
}
if let BuildableSource::Url(SourceUrl::Git(source)) = source {
if let Some(precise) = resolve_precise(
source.url,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?
{
let source = SourceUrl::Git(GitSourceUrl { url: &precise });
let source = BuildableSource::Url(source);
let metadata = self
.builder
.download_and_build_metadata(&source)
.boxed()
.await?;
return Ok((metadata, Some(precise)));
}
}
let metadata = self
.builder
.download_and_build_metadata(source)
.boxed()
.await?;
Ok((metadata, None))
Ok(metadata)
}
/// Stream a wheel from a URL, unzipping it into the cache as it's downloaded.

View file

@ -45,6 +45,8 @@ impl RepositoryReference {
}
/// Download a source distribution from a Git repository.
///
/// Assumes that the URL is a precise Git URL, with a full commit hash.
pub(crate) async fn fetch_git_archive(
url: &Url,
cache: &Cache,
@ -67,17 +69,6 @@ pub(crate) async fn fetch_git_archive(
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;
// Extract the resolved URL from the in-memory cache, to save a look-up in the fetch.
let url = {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
if let Some(resolved) = resolved_git_refs.get(&reference) {
url.with_precise(*resolved)
} else {
url
}
};
// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
@ -88,15 +79,6 @@ pub(crate) async fn fetch_git_archive(
.await?
.map_err(Error::Git)?;
// Insert the resolved URL into the in-memory cache.
if url.precise().is_none() {
if let Some(precise) = fetch.git().precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}
}
Ok((fetch, subdirectory))
}
@ -160,6 +142,28 @@ pub(crate) async fn resolve_precise(
})))
}
/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
///
/// This method will only return precise URLs for URLs that have already been resolved via
/// [`resolve_precise`].
pub fn to_precise(url: &Url) -> Option<Url> {
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).ok()?;
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let precise = resolved_git_refs.get(&reference)?;
Some(Url::from(DirectGitUrl {
url: url.with_precise(*precise),
subdirectory,
}))
}
/// Returns `true` if the URLs refer to the same Git commit.
///
/// For example, the previous URL could be a branch or tag, while the current URL would be a

View file

@ -1,7 +1,7 @@
pub use distribution_database::DistributionDatabase;
pub use download::{BuiltWheel, DiskWheel, LocalWheel};
pub use error::Error;
pub use git::is_same_reference;
pub use git::{is_same_reference, to_precise};
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
pub use reporter::Reporter;
pub use source::SourceDistributionBuilder;

View file

@ -34,7 +34,7 @@ use uv_fs::write_atomic;
use uv_types::{BuildContext, BuildKind, NoBuild, SourceBuildTrait};
use crate::error::Error;
use crate::git::fetch_git_archive;
use crate::git::{fetch_git_archive, resolve_precise};
use crate::source::built_wheel_metadata::BuiltWheelMetadata;
use crate::source::manifest::Manifest;
use crate::Reporter;
@ -713,17 +713,27 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
resource: &GitSourceUrl<'_>,
tags: &Tags,
) -> Result<BuiltWheelMetadata, Error> {
let (fetch, subdirectory) = fetch_git_archive(
// Resolve to a precise Git SHA.
let url = if let Some(url) = resolve_precise(
resource.url,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?;
.await?
{
Cow::Owned(url)
} else {
Cow::Borrowed(resource.url)
};
// Fetch the Git repository.
let (fetch, subdirectory) =
fetch_git_archive(&url, self.build_context.cache(), self.reporter.as_ref()).await?;
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
WheelCache::Git(&url, &git_sha.to_short_string()).root(),
);
// If the cache contains a compatible wheel, return it.
@ -768,17 +778,27 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
source: &BuildableSource<'_>,
resource: &GitSourceUrl<'_>,
) -> Result<Metadata23, Error> {
let (fetch, subdirectory) = fetch_git_archive(
// Resolve to a precise Git SHA.
let url = if let Some(url) = resolve_precise(
resource.url,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?;
.await?
{
Cow::Owned(url)
} else {
Cow::Borrowed(resource.url)
};
// Fetch the Git repository.
let (fetch, subdirectory) =
fetch_git_archive(&url, self.build_context.cache(), self.reporter.as_ref()).await?;
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
WheelCache::Git(&url, &git_sha.to_short_string()).root(),
);
// If the cache contains compatible metadata, return it.

View file

@ -1,7 +1,7 @@
use std::collections::VecDeque;
use anyhow::{Context, Result};
use cache_key::CanonicalUrl;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use rustc_hash::FxHashSet;
@ -139,7 +139,7 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
metadata.requires_dist.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let (metadata, precise) = self
let metadata = self
.database
.get_or_build_wheel_metadata(&dist)
.await
@ -153,11 +153,6 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata);
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(url), precise);
}
requires_dist
}
};

View file

@ -6,7 +6,6 @@ use anyhow::{Context, Result};
use futures::{StreamExt, TryStreamExt};
use url::Url;
use cache_key::CanonicalUrl;
use distribution_types::{BuildableSource, PackageId, PathSourceUrl, SourceUrl};
use pep508_rs::Requirement;
use uv_client::RegistryClient;
@ -94,16 +93,11 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> {
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = self.database.build_wheel_metadata(&source).await?;
let metadata = self.database.build_wheel_metadata(&source).await?;
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata.clone());
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(&url), precise);
}
metadata
}
};

View file

@ -8,7 +8,6 @@ use futures::{StreamExt, TryStreamExt};
use serde::Deserialize;
use tracing::debug;
use cache_key::CanonicalUrl;
use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_types::{
BuildableSource, DirectSourceUrl, GitSourceUrl, PackageId, PathSourceUrl, RemoteSource,
@ -243,18 +242,13 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = database.build_wheel_metadata(&source).await?;
let metadata = database.build_wheel_metadata(&source).await?;
let name = metadata.name.clone();
// Insert the metadata into the index.
index.insert_metadata(id, metadata);
// Insert the redirect into the index.
if let Some(precise) = precise {
index.insert_redirect(CanonicalUrl::new(&requirement.url), precise);
}
name
}
};

View file

@ -4,8 +4,8 @@ use pep508_rs::VerbatimUrl;
/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: &Url) -> VerbatimUrl {
let redirect = VerbatimUrl::from_url(redirect.clone());
pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
let redirect = VerbatimUrl::from_url(redirect);
// The redirect should be the "same" URL, but with a specific commit hash added after the `@`.
// We take advantage of this to preserve as much of the verbatim representation as possible.
@ -53,7 +53,7 @@ mod tests {
"https://github.com/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe",
)?
.with_given("https://github.com/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe");
assert_eq!(apply_redirect(&verbatim, &redirect), expected);
assert_eq!(apply_redirect(&verbatim, redirect), expected);
// If there's an `@` in the original representation, and it's stable between the parsed and
// given representations, we preserve everything that precedes the `@` in the precise
@ -67,7 +67,7 @@ mod tests {
"https://github.com/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe",
)?
.with_given("https://${DOMAIN}.com/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe");
assert_eq!(apply_redirect(&verbatim, &redirect), expected);
assert_eq!(apply_redirect(&verbatim, redirect), expected);
// If there's a conflict after the `@`, discard the original representation.
let verbatim = VerbatimUrl::parse_url("https://github.com/flask.git@main")?
@ -78,7 +78,7 @@ mod tests {
let expected = VerbatimUrl::parse_url(
"https://github.com/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe",
)?;
assert_eq!(apply_redirect(&verbatim, &redirect), expected);
assert_eq!(apply_redirect(&verbatim, redirect), expected);
Ok(())
}

View file

@ -2,8 +2,6 @@ use std::borrow::Cow;
use std::hash::BuildHasherDefault;
use anyhow::Result;
use cache_key::CanonicalUrl;
use dashmap::DashMap;
use itertools::Itertools;
use owo_colors::OwoColorize;
use petgraph::visit::EdgeRef;
@ -12,9 +10,7 @@ use pubgrub::range::Range;
use pubgrub::solver::{Kind, State};
use pubgrub::type_aliases::SelectedDependencies;
use rustc_hash::{FxHashMap, FxHashSet};
use url::Url;
use crate::dependency_provider::UvDependencyProvider;
use distribution_types::{
Dist, DistributionMetadata, LocalEditable, Name, PackageId, ResolvedDist, Verbatim,
VersionOrUrl,
@ -23,8 +19,10 @@ use once_map::OnceMap;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::{Hashes, Metadata23};
use uv_distribution::to_precise;
use uv_normalize::{ExtraName, PackageName};
use crate::dependency_provider::UvDependencyProvider;
use crate::editables::Editables;
use crate::pins::FilePins;
use crate::preferences::Preferences;
@ -69,7 +67,6 @@ impl ResolutionGraph {
pins: &FilePins,
packages: &OnceMap<PackageName, VersionsResponse>,
distributions: &OnceMap<PackageId, Metadata23>,
redirects: &DashMap<CanonicalUrl, Url>,
state: &State<UvDependencyProvider>,
preferences: &Preferences,
editables: Editables,
@ -120,10 +117,8 @@ impl ResolutionGraph {
let pinned_package = if let Some((editable, _)) = editables.get(package_name) {
Dist::from_editable(package_name.clone(), editable.clone())?
} else {
let url = redirects.get(&CanonicalUrl::new(url)).map_or_else(
|| url.clone(),
|precise| apply_redirect(url, precise.value()),
);
let url = to_precise(url)
.map_or_else(|| url.clone(), |precise| apply_redirect(url, precise));
Dist::from_url(package_name.clone(), url)?
};
@ -225,9 +220,9 @@ impl ResolutionGraph {
.or_insert_with(Vec::new)
.push(extra.clone());
} else {
let url = redirects.get(&CanonicalUrl::new(url)).map_or_else(
let url = to_precise(url).map_or_else(
|| url.clone(),
|precise| apply_redirect(url, precise.value()),
|precise| apply_redirect(url, precise),
);
let pinned_package = Dist::from_url(package_name.clone(), url)?;

View file

@ -1,14 +1,11 @@
use cache_key::CanonicalUrl;
use dashmap::DashMap;
use std::sync::Arc;
use url::Url;
use distribution_types::PackageId;
use once_map::OnceMap;
use pypi_types::Metadata23;
use uv_normalize::PackageName;
use super::provider::VersionsResponse;
use crate::resolver::provider::VersionsResponse;
/// In-memory index of package metadata.
#[derive(Default)]
@ -19,11 +16,6 @@ pub struct InMemoryIndex {
/// A map from package ID to metadata for that distribution.
pub(crate) distributions: OnceMap<PackageId, Metadata23>,
/// A map from source URL to precise URL. For example, the source URL
/// `git+https://github.com/pallets/flask.git` could be redirected to
/// `git+https://github.com/pallets/flask.git@c2f65dd1cfff0672b902fd5b30815f0b4137214c`.
pub(crate) redirects: DashMap<CanonicalUrl, Url>,
}
impl InMemoryIndex {
@ -37,11 +29,6 @@ impl InMemoryIndex {
self.distributions.done(package_id, metadata);
}
/// Insert a redirect from a source URL to a target URL.
pub fn insert_redirect(&self, source: CanonicalUrl, target: Url) {
self.redirects.insert(source, target);
}
/// Get the [`VersionsResponse`] for a given package name, without waiting.
pub fn get_package(&self, package_name: &PackageName) -> Option<Arc<VersionsResponse>> {
self.packages.get(package_name)

View file

@ -6,7 +6,7 @@ use std::ops::Deref;
use std::sync::Arc;
use anyhow::Result;
use cache_key::CanonicalUrl;
use dashmap::{DashMap, DashSet};
use futures::{FutureExt, StreamExt};
use itertools::Itertools;
@ -16,7 +16,6 @@ use pubgrub::solver::{Incompatibility, State};
use rustc_hash::{FxHashMap, FxHashSet};
use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, info_span, instrument, trace, Instrument};
use url::Url;
use distribution_types::{
BuiltDist, Dist, DistributionMetadata, IncompatibleDist, IncompatibleSource, IncompatibleWheel,
@ -300,7 +299,6 @@ impl<
&pins,
&self.index.packages,
&self.index.distributions,
&self.index.redirects,
&state,
&self.preferences,
self.editables.clone(),
@ -933,7 +931,6 @@ impl<
Some(Response::Dist {
dist: Dist::Built(dist),
metadata,
precise: _,
}) => {
trace!("Received built distribution metadata for: {dist}");
self.index.distributions.done(dist.package_id(), metadata);
@ -941,32 +938,11 @@ impl<
Some(Response::Dist {
dist: Dist::Source(distribution),
metadata,
precise,
}) => {
trace!("Received source distribution metadata for: {distribution}");
self.index
.distributions
.done(distribution.package_id(), metadata);
if let Some(precise) = precise {
match distribution {
SourceDist::DirectUrl(sdist) => {
self.index
.redirects
.insert(CanonicalUrl::new(&sdist.url), precise);
}
SourceDist::Git(sdist) => {
self.index
.redirects
.insert(CanonicalUrl::new(&sdist.url), precise);
}
SourceDist::Path(sdist) => {
self.index
.redirects
.insert(CanonicalUrl::new(&sdist.url), precise);
}
SourceDist::Registry(_) => {}
}
}
}
None => {}
}
@ -992,7 +968,7 @@ impl<
// Fetch distribution metadata from the distribution database.
Request::Dist(dist) => {
let (metadata, precise) = self
let metadata = self
.provider
.get_or_build_wheel_metadata(&dist)
.boxed()
@ -1009,11 +985,7 @@ impl<
ResolveError::FetchAndBuild(Box::new(source_dist), err)
}
})?;
Ok(Some(Response::Dist {
dist,
metadata,
precise,
}))
Ok(Some(Response::Dist { dist, metadata }))
}
Request::Installed(dist) => {
@ -1080,7 +1052,7 @@ impl<
let response = match dist {
ResolvedDist::Installable(dist) => {
let (metadata, precise) = self
let metadata = self
.provider
.get_or_build_wheel_metadata(&dist)
.boxed()
@ -1099,11 +1071,7 @@ impl<
ResolveError::FetchAndBuild(Box::new(source_dist), err)
}
})?;
Response::Dist {
dist,
metadata,
precise,
}
Response::Dist { dist, metadata }
}
ResolvedDist::Installed(dist) => {
let metadata = dist.metadata().map_err(|err| {
@ -1182,11 +1150,7 @@ enum Response {
/// The returned metadata for a package hosted on a registry.
Package(PackageName, VersionsResponse),
/// The returned metadata for a distribution.
Dist {
dist: Dist,
metadata: Metadata23,
precise: Option<Url>,
},
Dist { dist: Dist, metadata: Metadata23 },
/// The returned metadata for an already-installed distribution.
Installed {
dist: InstalledDist,

View file

@ -2,7 +2,6 @@ use std::future::Future;
use anyhow::Result;
use chrono::{DateTime, Utc};
use url::Url;
use distribution_types::{Dist, IndexLocations};
use platform_tags::Tags;
@ -17,7 +16,7 @@ use crate::version_map::VersionMap;
use crate::yanks::AllowedYanks;
pub type PackageVersionsResult = Result<VersionsResponse, uv_client::Error>;
pub type WheelMetadataResult = Result<(Metadata23, Option<Url>), uv_distribution::Error>;
pub type WheelMetadataResult = Result<Metadata23, uv_distribution::Error>;
/// The response when requesting versions for a package
#[derive(Debug)]