Use distribution database and index for all pre-resolution phases (#2766)

## Summary

Ensures that if we resolve any distributions before the resolver, we
cache the metadata in-memory.

_Also_ ensures that we lock (important!) when resolving Git
distributions.
This commit is contained in:
Charlie Marsh 2024-04-01 20:34:13 -04:00 committed by GitHub
parent dfdcce68fd
commit ccd457a37e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 189 additions and 101 deletions

View file

@ -54,6 +54,17 @@ pub enum SourceUrl<'a> {
Path(PathSourceUrl<'a>),
}
impl<'a> SourceUrl<'a> {
/// Return the [`Url`] of the source.
pub fn url(&self) -> &Url {
match self {
Self::Direct(dist) => dist.url,
Self::Git(dist) => dist.url,
Self::Path(dist) => dist.url,
}
}
}
impl std::fmt::Display for SourceUrl<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {

View file

@ -134,8 +134,9 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
// Fetch the metadata for the distribution.
let requires_dist = {
let id = dist.package_id();
if let Some(metadata) = self.index.get_metadata(&id) {
// If the metadata is already in the index, return it.
if let Some(metadata) = self.index.get_metadata(&dist.package_id()) {
metadata.requires_dist.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
@ -148,16 +149,17 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
Dist::Source(source) => format!("Failed to download and build: {source}"),
})?;
let requires_dist = metadata.requires_dist.clone();
// Insert the metadata into the index.
self.index
.insert_metadata(dist.package_id(), metadata.clone());
self.index.insert_metadata(id, metadata);
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(url), precise);
}
metadata.requires_dist
requires_dist
}
};

View file

@ -1,15 +1,17 @@
use std::borrow::Cow;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::{Context, Result};
use futures::{StreamExt, TryStreamExt};
use url::Url;
use distribution_types::{BuildableSource, PathSourceUrl, SourceUrl};
use cache_key::CanonicalUrl;
use distribution_types::{BuildableSource, PackageId, PathSourceUrl, SourceUrl};
use pep508_rs::Requirement;
use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder};
use uv_distribution::{DistributionDatabase, Reporter};
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext;
use crate::ExtrasSpecification;
@ -18,45 +20,47 @@ use crate::ExtrasSpecification;
///
/// Used, e.g., to determine the the input requirements when a user specifies a `pyproject.toml`
/// file, which may require running PEP 517 build hooks to extract metadata.
pub struct SourceTreeResolver<'a> {
pub struct SourceTreeResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project.
source_trees: Vec<PathBuf>,
/// The extras to include when resolving requirements.
extras: &'a ExtrasSpecification<'a>,
/// The reporter to use when building source distributions.
reporter: Option<Arc<dyn Reporter>>,
/// The in-memory index for resolving dependencies.
index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
}
impl<'a> SourceTreeResolver<'a> {
impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> {
/// Instantiate a new [`SourceTreeResolver`] for a given set of `source_trees`.
pub fn new(source_trees: Vec<PathBuf>, extras: &'a ExtrasSpecification<'a>) -> Self {
pub fn new(
source_trees: Vec<PathBuf>,
extras: &'a ExtrasSpecification<'a>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self {
source_trees,
extras,
reporter: None,
index,
database: DistributionDatabase::new(client, context),
}
}
/// Set the [`Reporter`] to use for this resolver.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self {
reporter: Some(reporter),
database: self.database.with_reporter(reporter),
..self
}
}
/// Resolve the requirements from the provided source trees.
pub async fn resolve<T: BuildContext>(
self,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
pub async fn resolve(self) -> Result<Vec<Requirement>> {
let requirements: Vec<_> = futures::stream::iter(self.source_trees.iter())
.map(|source_tree| async {
self.resolve_source_tree(source_tree, context, client).await
})
.map(|source_tree| async { self.resolve_source_tree(source_tree).await })
.buffered(50)
.try_collect()
.await?;
@ -64,12 +68,7 @@ impl<'a> SourceTreeResolver<'a> {
}
/// Infer the package name for a given "unnamed" requirement.
async fn resolve_source_tree<T: BuildContext>(
&self,
source_tree: &Path,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
async fn resolve_source_tree(&self, source_tree: &Path) -> Result<Vec<Requirement>> {
// Convert to a buildable source.
let path = fs_err::canonicalize(source_tree).with_context(|| {
format!(
@ -80,19 +79,33 @@ impl<'a> SourceTreeResolver<'a> {
let Ok(url) = Url::from_directory_path(&path) else {
return Err(anyhow::anyhow!("Failed to convert path to URL"));
};
let source = BuildableSource::Url(SourceUrl::Path(PathSourceUrl {
let source = SourceUrl::Path(PathSourceUrl {
url: &url,
path: Cow::Owned(path),
}));
});
// Run the PEP 517 build process to extract metadata from the source distribution.
let builder = if let Some(reporter) = self.reporter.clone() {
SourceDistributionBuilder::new(client, context).with_reporter(reporter)
// Fetch the metadata for the distribution.
let metadata = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = self.index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.deref().clone()
} else {
SourceDistributionBuilder::new(client, context)
};
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = self.database.build_wheel_metadata(&source).await?;
let metadata = builder.download_and_build_metadata(&source).await?;
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata.clone());
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(&url), precise);
}
metadata
}
};
// Determine the appropriate requirements to return based on the extras. This involves
// evaluating the `extras` expression in any markers, but preserving the remaining marker

View file

@ -1,72 +1,76 @@
use std::borrow::Cow;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use anyhow::{Context, Result};
use anyhow::Result;
use configparser::ini::Ini;
use futures::{StreamExt, TryStreamExt};
use serde::Deserialize;
use tracing::debug;
use cache_key::CanonicalUrl;
use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_types::{
BuildableSource, DirectSourceUrl, GitSourceUrl, PathSourceUrl, RemoteSource, SourceUrl,
BuildableSource, DirectSourceUrl, GitSourceUrl, PackageId, PathSourceUrl, RemoteSource,
SourceUrl,
};
use pep508_rs::{
Requirement, RequirementsTxtRequirement, Scheme, UnnamedRequirement, VersionOrUrl,
};
use pypi_types::Metadata10;
use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder};
use uv_distribution::{DistributionDatabase, Reporter};
use uv_normalize::PackageName;
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext;
/// Like [`RequirementsSpecification`], but with concrete names for all requirements.
pub struct NamedRequirementsResolver {
pub struct NamedRequirementsResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project.
requirements: Vec<RequirementsTxtRequirement>,
/// The reporter to use when building source distributions.
reporter: Option<Arc<dyn Reporter>>,
/// The in-memory index for resolving dependencies.
index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
}
impl NamedRequirementsResolver {
impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Context> {
/// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements.
pub fn new(requirements: Vec<RequirementsTxtRequirement>) -> Self {
pub fn new(
requirements: Vec<RequirementsTxtRequirement>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self {
requirements,
reporter: None,
index,
database: DistributionDatabase::new(client, context),
}
}
/// Set the [`Reporter`] to use for this resolver.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self {
reporter: Some(reporter),
database: self.database.with_reporter(reporter),
..self
}
}
/// Resolve any unnamed requirements in the specification.
pub async fn resolve<T: BuildContext>(
self,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
futures::stream::iter(self.requirements)
pub async fn resolve(self) -> Result<Vec<Requirement>> {
let Self {
requirements,
index,
database,
} = self;
futures::stream::iter(requirements)
.map(|requirement| async {
match requirement {
RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement),
RequirementsTxtRequirement::Unnamed(requirement) => {
Self::resolve_requirement(
requirement,
context,
client,
self.reporter.clone(),
)
.await
Self::resolve_requirement(requirement, index, &database).await
}
}
})
@ -76,11 +80,10 @@ impl NamedRequirementsResolver {
}
/// Infer the package name for a given "unnamed" requirement.
async fn resolve_requirement<T: BuildContext>(
async fn resolve_requirement(
requirement: UnnamedRequirement,
context: &T,
client: &RegistryClient,
reporter: Option<Arc<dyn Reporter>>,
index: &InMemoryIndex,
database: &DistributionDatabase<'a, Context>,
) -> Result<Requirement> {
// If the requirement is a wheel, extract the package name from the wheel filename.
//
@ -231,20 +234,33 @@ impl NamedRequirementsResolver {
}
};
// Run the PEP 517 build process to extract metadata from the source distribution.
let builder = if let Some(reporter) = reporter {
SourceDistributionBuilder::new(client, context).with_reporter(reporter)
// Fetch the metadata for the distribution.
let name = {
let id = PackageId::from_url(source.url());
if let Some(metadata) = index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.name.clone()
} else {
SourceDistributionBuilder::new(client, context)
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = database.build_wheel_metadata(&source).await?;
let name = metadata.name.clone();
// Insert the metadata into the index.
index.insert_metadata(id, metadata);
// Insert the redirect into the index.
if let Some(precise) = precise {
index.insert_redirect(CanonicalUrl::new(&requirement.url), precise);
}
name
}
};
let metadata = builder
.download_and_build_metadata(&BuildableSource::Url(source))
.await
.context("Failed to build source distribution")?;
Ok(Requirement {
name: metadata.name,
name,
extras: requirement.extras,
version_or_url: Some(VersionOrUrl::Url(requirement.url)),
marker: requirement.marker,

View file

@ -263,17 +263,28 @@ pub(crate) async fn pip_compile(
// Resolve the requirements from the provided sources.
let requirements = {
// Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements)
let mut requirements = NamedRequirementsResolver::new(
requirements,
&build_dispatch,
&client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.resolve()
.await?;
// Resolve any source trees into requirements.
if !source_trees.is_empty() {
requirements.extend(
SourceTreeResolver::new(source_trees, &extras)
SourceTreeResolver::new(
source_trees,
&extras,
&build_dispatch,
&client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.resolve()
.await?,
);
}

View file

@ -244,17 +244,18 @@ pub(crate) async fn pip_install(
// Resolve the requirements from the provided sources.
let requirements = {
// Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements)
let mut requirements =
NamedRequirementsResolver::new(requirements, &resolve_dispatch, &client, &index)
.with_reporter(ResolverReporter::from(printer))
.resolve(&resolve_dispatch, &client)
.resolve()
.await?;
// Resolve any source trees into requirements.
if !source_trees.is_empty() {
requirements.extend(
SourceTreeResolver::new(source_trees, extras)
SourceTreeResolver::new(source_trees, extras, &resolve_dispatch, &client, &index)
.with_reporter(ResolverReporter::from(printer))
.resolve(&resolve_dispatch, &client)
.resolve()
.await?,
);
}

View file

@ -194,17 +194,24 @@ pub(crate) async fn pip_sync(
// Convert from unnamed to named requirements.
let requirements = {
// Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements)
let mut requirements =
NamedRequirementsResolver::new(requirements, &build_dispatch, &client, &index)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.resolve()
.await?;
// Resolve any source trees into requirements.
if !source_trees.is_empty() {
requirements.extend(
SourceTreeResolver::new(source_trees, &ExtrasSpecification::None)
SourceTreeResolver::new(
source_trees,
&ExtrasSpecification::None,
&build_dispatch,
&client,
&index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve(&build_dispatch, &client)
.resolve()
.await?,
);
}

View file

@ -1354,6 +1354,33 @@ fn compile_git_concurrent_access() -> Result<()> {
Ok(())
}
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
#[test]
#[cfg(feature = "git")]
fn compile_git_unnamed_concurrent_access() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in
.write_str("git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a\ngit+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b")?;
uv_snapshot!(context.compile()
.arg("requirements.in"), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z requirements.in
example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a
example-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b
----- stderr -----
Resolved 2 packages in [TIME]
"###
);
Ok(())
}
/// Resolve a Git dependency with a declared name that differs from the true name of the package.
#[test]
#[cfg(feature = "git")]