Add an in-memory cache for Git references (#2682)

## Summary

Ensures that, even if we try to resolve the same Git reference twice
within an invocation, it always returns a (cached) consistent result.

Closes https://github.com/astral-sh/uv/issues/2673.

## Test Plan

```
❯ cargo run pip install git+https://github.com/pallets/flask.git --reinstall --no-cache
   Compiling uv-distribution v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-distribution)
   Compiling uv-resolver v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-resolver)
   Compiling uv-installer v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-installer)
   Compiling uv-dispatch v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-dispatch)
   Compiling uv-requirements v0.1.0 (/Users/crmarsh/workspace/uv/crates/uv-requirements)
   Compiling uv v0.1.24 (/Users/crmarsh/workspace/uv/crates/uv)
    Finished dev [unoptimized + debuginfo] target(s) in 3.95s
     Running `target/debug/uv pip install 'git+https://github.com/pallets/flask.git' --reinstall --no-cache`
 Updated https://github.com/pallets/flask.git (b90a4f1)
Resolved 7 packages in 280ms
   Built flask @ git+https://github.com/pallets/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe                                                                                                                                            Downloaded 7 packages in 212ms
Installed 7 packages in 9ms
```
This commit is contained in:
Charlie Marsh 2024-03-26 21:39:01 -04:00 committed by GitHub
parent 32d8ee8ba3
commit ffd78d0821
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 161 additions and 86 deletions

View file

@ -11,19 +11,17 @@ use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{
BuildableSource, BuiltDist, DirectGitUrl, Dist, FileLocation, IndexLocations, LocalEditable,
Name, SourceDist,
BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name,
};
use platform_tags::Tags;
use pypi_types::Metadata23;
use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, CacheEntry, WheelCache};
use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient};
use uv_git::GitSource;
use uv_types::{BuildContext, NoBinary, NoBuild};
use crate::download::{BuiltWheel, UnzippedWheel};
use crate::git::resolve_precise;
use crate::locks::Locks;
use crate::reporter::Facade;
use crate::{DiskWheel, Error, LocalWheel, Reporter, SourceDistCachedBuilder};
/// A cached high-level interface to convert distributions (a requirement resolved to a location)
@ -356,7 +354,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let _guard = lock.lock().await;
// Insert the `precise` URL, if it exists.
let precise = self.precise(source_dist).await?;
let precise = resolve_precise(
source_dist,
self.build_context.cache(),
self.reporter.as_ref(),
)
.await?;
let source_dist = match precise.as_ref() {
Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())),
@ -393,44 +396,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
Ok((LocalWheel::Built(built_wheel), metadata))
}
/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
async fn precise(&self, dist: &SourceDist) -> Result<Option<Url>, Error> {
let SourceDist::Git(source_dist) = dist else {
return Ok(None);
};
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
let DirectGitUrl { url, subdirectory } =
DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?;
// If the commit already contains a complete SHA, short-circuit.
if url.precise().is_some() {
return Ok(None);
}
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let source = if let Some(reporter) = self.reporter.clone() {
GitSource::new(url, git_dir).with_reporter(Facade::from(reporter))
} else {
GitSource::new(url, git_dir)
};
let precise = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let url = precise.into_git();
// Re-encode as a URL.
Ok(Some(Url::from(DirectGitUrl { url, subdirectory })))
}
/// Stream a wheel from a URL, unzipping it into the cache as it's downloaded.
async fn stream_wheel(
&self,