Use distribution database and index for all pre-resolution phases (#2766)

## Summary

Ensures that if we resolve any distributions before the resolver, we
cache the metadata in-memory.

_Also_ ensures that we lock (important!) when resolving Git
distributions.
This commit is contained in:
Charlie Marsh 2024-04-01 20:34:13 -04:00 committed by GitHub
parent dfdcce68fd
commit ccd457a37e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 189 additions and 101 deletions

View file

@ -54,6 +54,17 @@ pub enum SourceUrl<'a> {
Path(PathSourceUrl<'a>), Path(PathSourceUrl<'a>),
} }
impl<'a> SourceUrl<'a> {
/// Return the [`Url`] of the source.
pub fn url(&self) -> &Url {
match self {
Self::Direct(dist) => dist.url,
Self::Git(dist) => dist.url,
Self::Path(dist) => dist.url,
}
}
}
impl std::fmt::Display for SourceUrl<'_> { impl std::fmt::Display for SourceUrl<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {

View file

@ -134,8 +134,9 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
// Fetch the metadata for the distribution. // Fetch the metadata for the distribution.
let requires_dist = { let requires_dist = {
// If the metadata is already in the index, return it. let id = dist.package_id();
if let Some(metadata) = self.index.get_metadata(&dist.package_id()) { if let Some(metadata) = self.index.get_metadata(&id) {
// If the metadata is already in the index, return it.
metadata.requires_dist.clone() metadata.requires_dist.clone()
} else { } else {
// Run the PEP 517 build process to extract metadata from the source distribution. // Run the PEP 517 build process to extract metadata from the source distribution.
@ -148,16 +149,17 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> {
Dist::Source(source) => format!("Failed to download and build: {source}"), Dist::Source(source) => format!("Failed to download and build: {source}"),
})?; })?;
let requires_dist = metadata.requires_dist.clone();
// Insert the metadata into the index. // Insert the metadata into the index.
self.index self.index.insert_metadata(id, metadata);
.insert_metadata(dist.package_id(), metadata.clone());
// Insert the redirect into the index. // Insert the redirect into the index.
if let Some(precise) = precise { if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(url), precise); self.index.insert_redirect(CanonicalUrl::new(url), precise);
} }
metadata.requires_dist requires_dist
} }
}; };

View file

@ -1,15 +1,17 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::ops::Deref;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use futures::{StreamExt, TryStreamExt}; use futures::{StreamExt, TryStreamExt};
use url::Url; use url::Url;
use distribution_types::{BuildableSource, PathSourceUrl, SourceUrl}; use cache_key::CanonicalUrl;
use distribution_types::{BuildableSource, PackageId, PathSourceUrl, SourceUrl};
use pep508_rs::Requirement; use pep508_rs::Requirement;
use uv_client::RegistryClient; use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder}; use uv_distribution::{DistributionDatabase, Reporter};
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext; use uv_types::BuildContext;
use crate::ExtrasSpecification; use crate::ExtrasSpecification;
@ -18,45 +20,47 @@ use crate::ExtrasSpecification;
/// ///
/// Used, e.g., to determine the the input requirements when a user specifies a `pyproject.toml` /// Used, e.g., to determine the the input requirements when a user specifies a `pyproject.toml`
/// file, which may require running PEP 517 build hooks to extract metadata. /// file, which may require running PEP 517 build hooks to extract metadata.
pub struct SourceTreeResolver<'a> { pub struct SourceTreeResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project. /// The requirements for the project.
source_trees: Vec<PathBuf>, source_trees: Vec<PathBuf>,
/// The extras to include when resolving requirements. /// The extras to include when resolving requirements.
extras: &'a ExtrasSpecification<'a>, extras: &'a ExtrasSpecification<'a>,
/// The reporter to use when building source distributions. /// The in-memory index for resolving dependencies.
reporter: Option<Arc<dyn Reporter>>, index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
} }
impl<'a> SourceTreeResolver<'a> { impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> {
/// Instantiate a new [`SourceTreeResolver`] for a given set of `source_trees`. /// Instantiate a new [`SourceTreeResolver`] for a given set of `source_trees`.
pub fn new(source_trees: Vec<PathBuf>, extras: &'a ExtrasSpecification<'a>) -> Self { pub fn new(
source_trees: Vec<PathBuf>,
extras: &'a ExtrasSpecification<'a>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self { Self {
source_trees, source_trees,
extras, extras,
reporter: None, index,
database: DistributionDatabase::new(client, context),
} }
} }
/// Set the [`Reporter`] to use for this resolver. /// Set the [`Reporter`] to use for this resolver.
#[must_use] #[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self { pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self { Self {
reporter: Some(reporter), database: self.database.with_reporter(reporter),
..self ..self
} }
} }
/// Resolve the requirements from the provided source trees. /// Resolve the requirements from the provided source trees.
pub async fn resolve<T: BuildContext>( pub async fn resolve(self) -> Result<Vec<Requirement>> {
self,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
let requirements: Vec<_> = futures::stream::iter(self.source_trees.iter()) let requirements: Vec<_> = futures::stream::iter(self.source_trees.iter())
.map(|source_tree| async { .map(|source_tree| async { self.resolve_source_tree(source_tree).await })
self.resolve_source_tree(source_tree, context, client).await
})
.buffered(50) .buffered(50)
.try_collect() .try_collect()
.await?; .await?;
@ -64,12 +68,7 @@ impl<'a> SourceTreeResolver<'a> {
} }
/// Infer the package name for a given "unnamed" requirement. /// Infer the package name for a given "unnamed" requirement.
async fn resolve_source_tree<T: BuildContext>( async fn resolve_source_tree(&self, source_tree: &Path) -> Result<Vec<Requirement>> {
&self,
source_tree: &Path,
context: &T,
client: &RegistryClient,
) -> Result<Vec<Requirement>> {
// Convert to a buildable source. // Convert to a buildable source.
let path = fs_err::canonicalize(source_tree).with_context(|| { let path = fs_err::canonicalize(source_tree).with_context(|| {
format!( format!(
@ -80,20 +79,34 @@ impl<'a> SourceTreeResolver<'a> {
let Ok(url) = Url::from_directory_path(&path) else { let Ok(url) = Url::from_directory_path(&path) else {
return Err(anyhow::anyhow!("Failed to convert path to URL")); return Err(anyhow::anyhow!("Failed to convert path to URL"));
}; };
let source = BuildableSource::Url(SourceUrl::Path(PathSourceUrl { let source = SourceUrl::Path(PathSourceUrl {
url: &url, url: &url,
path: Cow::Owned(path), path: Cow::Owned(path),
})); });
// Run the PEP 517 build process to extract metadata from the source distribution. // Fetch the metadata for the distribution.
let builder = if let Some(reporter) = self.reporter.clone() { let metadata = {
SourceDistributionBuilder::new(client, context).with_reporter(reporter) let id = PackageId::from_url(source.url());
} else { if let Some(metadata) = self.index.get_metadata(&id) {
SourceDistributionBuilder::new(client, context) // If the metadata is already in the index, return it.
metadata.deref().clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = self.database.build_wheel_metadata(&source).await?;
// Insert the metadata into the index.
self.index.insert_metadata(id, metadata.clone());
// Insert the redirect into the index.
if let Some(precise) = precise {
self.index.insert_redirect(CanonicalUrl::new(&url), precise);
}
metadata
}
}; };
let metadata = builder.download_and_build_metadata(&source).await?;
// Determine the appropriate requirements to return based on the extras. This involves // Determine the appropriate requirements to return based on the extras. This involves
// evaluating the `extras` expression in any markers, but preserving the remaining marker // evaluating the `extras` expression in any markers, but preserving the remaining marker
// conditions. // conditions.

View file

@ -1,72 +1,76 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::Result;
use configparser::ini::Ini; use configparser::ini::Ini;
use futures::{StreamExt, TryStreamExt}; use futures::{StreamExt, TryStreamExt};
use serde::Deserialize; use serde::Deserialize;
use tracing::debug; use tracing::debug;
use cache_key::CanonicalUrl;
use distribution_filename::{SourceDistFilename, WheelFilename}; use distribution_filename::{SourceDistFilename, WheelFilename};
use distribution_types::{ use distribution_types::{
BuildableSource, DirectSourceUrl, GitSourceUrl, PathSourceUrl, RemoteSource, SourceUrl, BuildableSource, DirectSourceUrl, GitSourceUrl, PackageId, PathSourceUrl, RemoteSource,
SourceUrl,
}; };
use pep508_rs::{ use pep508_rs::{
Requirement, RequirementsTxtRequirement, Scheme, UnnamedRequirement, VersionOrUrl, Requirement, RequirementsTxtRequirement, Scheme, UnnamedRequirement, VersionOrUrl,
}; };
use pypi_types::Metadata10; use pypi_types::Metadata10;
use uv_client::RegistryClient; use uv_client::RegistryClient;
use uv_distribution::{Reporter, SourceDistributionBuilder}; use uv_distribution::{DistributionDatabase, Reporter};
use uv_normalize::PackageName; use uv_normalize::PackageName;
use uv_resolver::InMemoryIndex;
use uv_types::BuildContext; use uv_types::BuildContext;
/// Like [`RequirementsSpecification`], but with concrete names for all requirements. /// Like [`RequirementsSpecification`], but with concrete names for all requirements.
pub struct NamedRequirementsResolver { pub struct NamedRequirementsResolver<'a, Context: BuildContext + Send + Sync> {
/// The requirements for the project. /// The requirements for the project.
requirements: Vec<RequirementsTxtRequirement>, requirements: Vec<RequirementsTxtRequirement>,
/// The reporter to use when building source distributions. /// The in-memory index for resolving dependencies.
reporter: Option<Arc<dyn Reporter>>, index: &'a InMemoryIndex,
/// The database for fetching and building distributions.
database: DistributionDatabase<'a, Context>,
} }
impl NamedRequirementsResolver { impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Context> {
/// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements. /// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements.
pub fn new(requirements: Vec<RequirementsTxtRequirement>) -> Self { pub fn new(
requirements: Vec<RequirementsTxtRequirement>,
context: &'a Context,
client: &'a RegistryClient,
index: &'a InMemoryIndex,
) -> Self {
Self { Self {
requirements, requirements,
reporter: None, index,
database: DistributionDatabase::new(client, context),
} }
} }
/// Set the [`Reporter`] to use for this resolver. /// Set the [`Reporter`] to use for this resolver.
#[must_use] #[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self { pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
let reporter: Arc<dyn Reporter> = Arc::new(reporter);
Self { Self {
reporter: Some(reporter), database: self.database.with_reporter(reporter),
..self ..self
} }
} }
/// Resolve any unnamed requirements in the specification. /// Resolve any unnamed requirements in the specification.
pub async fn resolve<T: BuildContext>( pub async fn resolve(self) -> Result<Vec<Requirement>> {
self, let Self {
context: &T, requirements,
client: &RegistryClient, index,
) -> Result<Vec<Requirement>> { database,
futures::stream::iter(self.requirements) } = self;
futures::stream::iter(requirements)
.map(|requirement| async { .map(|requirement| async {
match requirement { match requirement {
RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement), RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement),
RequirementsTxtRequirement::Unnamed(requirement) => { RequirementsTxtRequirement::Unnamed(requirement) => {
Self::resolve_requirement( Self::resolve_requirement(requirement, index, &database).await
requirement,
context,
client,
self.reporter.clone(),
)
.await
} }
} }
}) })
@ -76,11 +80,10 @@ impl NamedRequirementsResolver {
} }
/// Infer the package name for a given "unnamed" requirement. /// Infer the package name for a given "unnamed" requirement.
async fn resolve_requirement<T: BuildContext>( async fn resolve_requirement(
requirement: UnnamedRequirement, requirement: UnnamedRequirement,
context: &T, index: &InMemoryIndex,
client: &RegistryClient, database: &DistributionDatabase<'a, Context>,
reporter: Option<Arc<dyn Reporter>>,
) -> Result<Requirement> { ) -> Result<Requirement> {
// If the requirement is a wheel, extract the package name from the wheel filename. // If the requirement is a wheel, extract the package name from the wheel filename.
// //
@ -231,20 +234,33 @@ impl NamedRequirementsResolver {
} }
}; };
// Run the PEP 517 build process to extract metadata from the source distribution. // Fetch the metadata for the distribution.
let builder = if let Some(reporter) = reporter { let name = {
SourceDistributionBuilder::new(client, context).with_reporter(reporter) let id = PackageId::from_url(source.url());
} else { if let Some(metadata) = index.get_metadata(&id) {
SourceDistributionBuilder::new(client, context) // If the metadata is already in the index, return it.
metadata.name.clone()
} else {
// Run the PEP 517 build process to extract metadata from the source distribution.
let source = BuildableSource::Url(source);
let (metadata, precise) = database.build_wheel_metadata(&source).await?;
let name = metadata.name.clone();
// Insert the metadata into the index.
index.insert_metadata(id, metadata);
// Insert the redirect into the index.
if let Some(precise) = precise {
index.insert_redirect(CanonicalUrl::new(&requirement.url), precise);
}
name
}
}; };
let metadata = builder
.download_and_build_metadata(&BuildableSource::Url(source))
.await
.context("Failed to build source distribution")?;
Ok(Requirement { Ok(Requirement {
name: metadata.name, name,
extras: requirement.extras, extras: requirement.extras,
version_or_url: Some(VersionOrUrl::Url(requirement.url)), version_or_url: Some(VersionOrUrl::Url(requirement.url)),
marker: requirement.marker, marker: requirement.marker,

View file

@ -263,18 +263,29 @@ pub(crate) async fn pip_compile(
// Resolve the requirements from the provided sources. // Resolve the requirements from the provided sources.
let requirements = { let requirements = {
// Convert from unnamed to named requirements. // Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements) let mut requirements = NamedRequirementsResolver::new(
.with_reporter(ResolverReporter::from(printer)) requirements,
.resolve(&build_dispatch, &client) &build_dispatch,
.await?; &client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve()
.await?;
// Resolve any source trees into requirements. // Resolve any source trees into requirements.
if !source_trees.is_empty() { if !source_trees.is_empty() {
requirements.extend( requirements.extend(
SourceTreeResolver::new(source_trees, &extras) SourceTreeResolver::new(
.with_reporter(ResolverReporter::from(printer)) source_trees,
.resolve(&build_dispatch, &client) &extras,
.await?, &build_dispatch,
&client,
&top_level_index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve()
.await?,
); );
} }

View file

@ -244,17 +244,18 @@ pub(crate) async fn pip_install(
// Resolve the requirements from the provided sources. // Resolve the requirements from the provided sources.
let requirements = { let requirements = {
// Convert from unnamed to named requirements. // Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements) let mut requirements =
.with_reporter(ResolverReporter::from(printer)) NamedRequirementsResolver::new(requirements, &resolve_dispatch, &client, &index)
.resolve(&resolve_dispatch, &client) .with_reporter(ResolverReporter::from(printer))
.await?; .resolve()
.await?;
// Resolve any source trees into requirements. // Resolve any source trees into requirements.
if !source_trees.is_empty() { if !source_trees.is_empty() {
requirements.extend( requirements.extend(
SourceTreeResolver::new(source_trees, extras) SourceTreeResolver::new(source_trees, extras, &resolve_dispatch, &client, &index)
.with_reporter(ResolverReporter::from(printer)) .with_reporter(ResolverReporter::from(printer))
.resolve(&resolve_dispatch, &client) .resolve()
.await?, .await?,
); );
} }

View file

@ -194,18 +194,25 @@ pub(crate) async fn pip_sync(
// Convert from unnamed to named requirements. // Convert from unnamed to named requirements.
let requirements = { let requirements = {
// Convert from unnamed to named requirements. // Convert from unnamed to named requirements.
let mut requirements = NamedRequirementsResolver::new(requirements) let mut requirements =
.with_reporter(ResolverReporter::from(printer)) NamedRequirementsResolver::new(requirements, &build_dispatch, &client, &index)
.resolve(&build_dispatch, &client) .with_reporter(ResolverReporter::from(printer))
.await?; .resolve()
.await?;
// Resolve any source trees into requirements. // Resolve any source trees into requirements.
if !source_trees.is_empty() { if !source_trees.is_empty() {
requirements.extend( requirements.extend(
SourceTreeResolver::new(source_trees, &ExtrasSpecification::None) SourceTreeResolver::new(
.with_reporter(ResolverReporter::from(printer)) source_trees,
.resolve(&build_dispatch, &client) &ExtrasSpecification::None,
.await?, &build_dispatch,
&client,
&index,
)
.with_reporter(ResolverReporter::from(printer))
.resolve()
.await?,
); );
} }

View file

@ -1354,6 +1354,33 @@ fn compile_git_concurrent_access() -> Result<()> {
Ok(()) Ok(())
} }
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
#[test]
#[cfg(feature = "git")]
fn compile_git_unnamed_concurrent_access() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in
.write_str("git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a\ngit+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b")?;
uv_snapshot!(context.compile()
.arg("requirements.in"), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2024-03-25T00:00:00Z requirements.in
example-pkg-a @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_a
example-pkg-b @ git+https://github.com/pypa/sample-namespace-packages.git@df7530eeb8fa0cb7dbb8ecb28363e8e36bfa2f45#subdirectory=pkg_resources/pkg_b
----- stderr -----
Resolved 2 packages in [TIME]
"###
);
Ok(())
}
/// Resolve a Git dependency with a declared name that differs from the true name of the package. /// Resolve a Git dependency with a declared name that differs from the true name of the package.
#[test] #[test]
#[cfg(feature = "git")] #[cfg(feature = "git")]