mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
Add an in-memory cache for Git references (#2682)
## Summary Ensures that, even if we try to resolve the same Git reference twice within an invocation, it always returns a (cached) consistent result. Closes https://github.com/astral-sh/uv/issues/2673. ## Test Plan ``` ❯ cargo run pip install git+https://github.com/pallets/flask.git --reinstall --no-cache Compiling uv-distribution v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-distribution) Compiling uv-resolver v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-resolver) Compiling uv-installer v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-installer) Compiling uv-dispatch v0.0.1 (/Users/crmarsh/workspace/uv/crates/uv-dispatch) Compiling uv-requirements v0.1.0 (/Users/crmarsh/workspace/uv/crates/uv-requirements) Compiling uv v0.1.24 (/Users/crmarsh/workspace/uv/crates/uv) Finished dev [unoptimized + debuginfo] target(s) in 3.95s Running `target/debug/uv pip install 'git+https://github.com/pallets/flask.git' --reinstall --no-cache` Updated https://github.com/pallets/flask.git (b90a4f1) Resolved 7 packages in 280ms Built flask @ git+https://github.com/pallets/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe Downloaded 7 packages in 212ms Installed 7 packages in 9ms ```
This commit is contained in:
parent
32d8ee8ba3
commit
ffd78d0821
9 changed files with 161 additions and 86 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -4570,6 +4570,7 @@ dependencies = [
|
|||
"futures",
|
||||
"install-wheel-rs",
|
||||
"nanoid",
|
||||
"once_cell",
|
||||
"pep440_rs",
|
||||
"pep508_rs",
|
||||
"platform-tags",
|
||||
|
|
|
@ -20,6 +20,7 @@ install-wheel-rs = { workspace = true }
|
|||
pep440_rs = { workspace = true }
|
||||
pep508_rs = { workspace = true }
|
||||
platform-tags = { workspace = true }
|
||||
pypi-types = { workspace = true }
|
||||
uv-cache = { workspace = true }
|
||||
uv-client = { workspace = true }
|
||||
uv-extract = { workspace = true }
|
||||
|
@ -27,12 +28,12 @@ uv-fs = { workspace = true, features = ["tokio"] }
|
|||
uv-git = { workspace = true, features = ["vendored-openssl"] }
|
||||
uv-normalize = { workspace = true }
|
||||
uv-types = { workspace = true }
|
||||
pypi-types = { workspace = true }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
fs-err = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
nanoid = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
reqwest-middleware = { workspace = true }
|
||||
rmp-serde = { workspace = true }
|
||||
|
@ -45,3 +46,4 @@ tokio-util = { workspace = true, features = ["compat"] }
|
|||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
zip = { workspace = true }
|
||||
|
||||
|
|
|
@ -11,19 +11,17 @@ use url::Url;
|
|||
|
||||
use distribution_filename::WheelFilename;
|
||||
use distribution_types::{
|
||||
BuildableSource, BuiltDist, DirectGitUrl, Dist, FileLocation, IndexLocations, LocalEditable,
|
||||
Name, SourceDist,
|
||||
BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name,
|
||||
};
|
||||
use platform_tags::Tags;
|
||||
use pypi_types::Metadata23;
|
||||
use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, CacheEntry, WheelCache};
|
||||
use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient};
|
||||
use uv_git::GitSource;
|
||||
use uv_types::{BuildContext, NoBinary, NoBuild};
|
||||
|
||||
use crate::download::{BuiltWheel, UnzippedWheel};
|
||||
use crate::git::resolve_precise;
|
||||
use crate::locks::Locks;
|
||||
use crate::reporter::Facade;
|
||||
use crate::{DiskWheel, Error, LocalWheel, Reporter, SourceDistCachedBuilder};
|
||||
|
||||
/// A cached high-level interface to convert distributions (a requirement resolved to a location)
|
||||
|
@ -356,7 +354,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
let _guard = lock.lock().await;
|
||||
|
||||
// Insert the `precise` URL, if it exists.
|
||||
let precise = self.precise(source_dist).await?;
|
||||
let precise = resolve_precise(
|
||||
source_dist,
|
||||
self.build_context.cache(),
|
||||
self.reporter.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let source_dist = match precise.as_ref() {
|
||||
Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())),
|
||||
|
@ -393,44 +396,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
|
|||
Ok((LocalWheel::Built(built_wheel), metadata))
|
||||
}
|
||||
|
||||
/// Given a remote source distribution, return a precise variant, if possible.
|
||||
///
|
||||
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
|
||||
/// with a precise reference to the current commit of that branch or tag.
|
||||
///
|
||||
/// This method takes into account various normalizations that are independent from the Git
|
||||
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
|
||||
/// prefix kinds.
|
||||
async fn precise(&self, dist: &SourceDist) -> Result<Option<Url>, Error> {
|
||||
let SourceDist::Git(source_dist) = dist else {
|
||||
return Ok(None);
|
||||
};
|
||||
let git_dir = self.build_context.cache().bucket(CacheBucket::Git);
|
||||
|
||||
let DirectGitUrl { url, subdirectory } =
|
||||
DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?;
|
||||
|
||||
// If the commit already contains a complete SHA, short-circuit.
|
||||
if url.precise().is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
|
||||
// commit, etc.).
|
||||
let source = if let Some(reporter) = self.reporter.clone() {
|
||||
GitSource::new(url, git_dir).with_reporter(Facade::from(reporter))
|
||||
} else {
|
||||
GitSource::new(url, git_dir)
|
||||
};
|
||||
let precise = tokio::task::spawn_blocking(move || source.fetch())
|
||||
.await?
|
||||
.map_err(Error::Git)?;
|
||||
let url = precise.into_git();
|
||||
|
||||
// Re-encode as a URL.
|
||||
Ok(Some(Url::from(DirectGitUrl { url, subdirectory })))
|
||||
}
|
||||
|
||||
/// Stream a wheel from a URL, unzipping it into the cache as it's downloaded.
|
||||
async fn stream_wheel(
|
||||
&self,
|
||||
|
|
141
crates/uv-distribution/src/git.rs
Normal file
141
crates/uv-distribution/src/git.rs
Normal file
|
@ -0,0 +1,141 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use anyhow::Result;
|
||||
use fs_err::tokio as fs;
|
||||
use once_cell::sync::Lazy;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::debug;
|
||||
use url::Url;
|
||||
|
||||
use distribution_types::{DirectGitUrl, SourceDist};
|
||||
use uv_cache::{Cache, CacheBucket};
|
||||
use uv_fs::LockedFile;
|
||||
use uv_git::{Fetch, GitSource, GitUrl};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::reporter::Facade;
|
||||
use crate::Reporter;
|
||||
|
||||
/// Global cache of resolved Git references.
|
||||
///
|
||||
/// Used to ensure that a given Git URL is only resolved once, and that the resolved URL is
|
||||
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
|
||||
/// then the resolved URL should always refer to the same commit across the lifetime of the
|
||||
/// process.)
|
||||
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<GitUrl, GitUrl>>> = Lazy::new(Mutex::default);
|
||||
|
||||
/// Download a source distribution from a Git repository.
|
||||
pub(crate) async fn fetch_git_archive(
|
||||
url: &Url,
|
||||
cache: &Cache,
|
||||
reporter: Option<&Arc<dyn Reporter>>,
|
||||
) -> Result<(Fetch, Option<PathBuf>), Error> {
|
||||
debug!("Fetching source distribution from Git: {url}");
|
||||
let git_dir = cache.bucket(CacheBucket::Git);
|
||||
|
||||
// Avoid races between different processes, too.
|
||||
let lock_dir = git_dir.join("locks");
|
||||
fs::create_dir_all(&lock_dir)
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
let canonical_url = cache_key::CanonicalUrl::new(url);
|
||||
let _lock = LockedFile::acquire(
|
||||
lock_dir.join(cache_key::digest(&canonical_url)),
|
||||
&canonical_url,
|
||||
)
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;
|
||||
|
||||
// Extract the resolved URL from the in-memory cache, to save a look-up in the fetch.
|
||||
let url = {
|
||||
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
||||
if let Some(resolved) = resolved_git_refs.get(&url) {
|
||||
resolved.clone()
|
||||
} else {
|
||||
url
|
||||
}
|
||||
};
|
||||
|
||||
// Fetch the Git repository.
|
||||
let source = if let Some(reporter) = reporter {
|
||||
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
|
||||
} else {
|
||||
GitSource::new(url.clone(), git_dir)
|
||||
};
|
||||
let fetch = tokio::task::spawn_blocking(move || source.fetch())
|
||||
.await?
|
||||
.map_err(Error::Git)?;
|
||||
|
||||
// Insert the resolved URL into the in-memory cache.
|
||||
{
|
||||
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
||||
let precise = fetch.git().clone();
|
||||
resolved_git_refs.insert(url, precise);
|
||||
}
|
||||
|
||||
Ok((fetch, subdirectory))
|
||||
}
|
||||
|
||||
/// Given a remote source distribution, return a precise variant, if possible.
|
||||
///
|
||||
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
|
||||
/// with a precise reference to the current commit of that branch or tag.
|
||||
///
|
||||
/// This method takes into account various normalizations that are independent from the Git
|
||||
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
|
||||
/// prefix kinds.
|
||||
pub(crate) async fn resolve_precise(
|
||||
dist: &SourceDist,
|
||||
cache: &Cache,
|
||||
reporter: Option<&Arc<dyn Reporter>>,
|
||||
) -> Result<Option<Url>, Error> {
|
||||
let SourceDist::Git(source_dist) = dist else {
|
||||
return Ok(None);
|
||||
};
|
||||
let git_dir = cache.bucket(CacheBucket::Git);
|
||||
|
||||
let DirectGitUrl { url, subdirectory } =
|
||||
DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?;
|
||||
|
||||
// If the Git reference already contains a complete SHA, short-circuit.
|
||||
if url.precise().is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// If the Git reference is in the in-memory cache, return it.
|
||||
{
|
||||
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
||||
if let Some(precise) = resolved_git_refs.get(&url) {
|
||||
return Ok(Some(Url::from(DirectGitUrl {
|
||||
url: precise.clone(),
|
||||
subdirectory,
|
||||
})));
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
|
||||
// commit, etc.).
|
||||
let source = if let Some(reporter) = reporter {
|
||||
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
|
||||
} else {
|
||||
GitSource::new(url.clone(), git_dir)
|
||||
};
|
||||
let fetch = tokio::task::spawn_blocking(move || source.fetch())
|
||||
.await?
|
||||
.map_err(Error::Git)?;
|
||||
let precise = fetch.into_git();
|
||||
|
||||
// Insert the resolved URL into the in-memory cache.
|
||||
{
|
||||
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
||||
resolved_git_refs.insert(url.clone(), precise.clone());
|
||||
}
|
||||
|
||||
// Re-encode as a URL.
|
||||
Ok(Some(Url::from(DirectGitUrl {
|
||||
url: precise,
|
||||
subdirectory,
|
||||
})))
|
||||
}
|
|
@ -9,6 +9,7 @@ pub use unzip::Unzip;
|
|||
mod distribution_database;
|
||||
mod download;
|
||||
mod error;
|
||||
mod git;
|
||||
mod index;
|
||||
mod locks;
|
||||
mod reporter;
|
||||
|
|
|
@ -17,8 +17,8 @@ use zip::ZipArchive;
|
|||
|
||||
use distribution_filename::WheelFilename;
|
||||
use distribution_types::{
|
||||
BuildableSource, DirectArchiveUrl, DirectGitUrl, Dist, FileLocation, GitSourceUrl,
|
||||
LocalEditable, PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl,
|
||||
BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, LocalEditable,
|
||||
PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl,
|
||||
};
|
||||
use install_wheel_rs::metadata::read_archive_metadata;
|
||||
use pep508_rs::Scheme;
|
||||
|
@ -31,12 +31,11 @@ use uv_cache::{
|
|||
use uv_client::{
|
||||
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
|
||||
};
|
||||
use uv_fs::{write_atomic, LockedFile};
|
||||
use uv_git::{Fetch, GitSource};
|
||||
use uv_fs::write_atomic;
|
||||
use uv_types::{BuildContext, BuildKind, NoBuild, SourceBuildTrait};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::reporter::Facade;
|
||||
use crate::git::fetch_git_archive;
|
||||
use crate::source::built_wheel_metadata::BuiltWheelMetadata;
|
||||
use crate::source::manifest::Manifest;
|
||||
use crate::Reporter;
|
||||
|
@ -1233,40 +1232,6 @@ async fn extract_archive(path: &Path, cache: &Cache) -> Result<ExtractedSource,
|
|||
}
|
||||
}
|
||||
|
||||
/// Download a source distribution from a Git repository.
|
||||
async fn fetch_git_archive(
|
||||
url: &Url,
|
||||
cache: &Cache,
|
||||
reporter: Option<&Arc<dyn Reporter>>,
|
||||
) -> Result<(Fetch, Option<PathBuf>), Error> {
|
||||
debug!("Fetching source distribution from Git: {url}");
|
||||
let git_dir = cache.bucket(CacheBucket::Git);
|
||||
|
||||
// Avoid races between different processes, too.
|
||||
let lock_dir = git_dir.join("locks");
|
||||
fs::create_dir_all(&lock_dir)
|
||||
.await
|
||||
.map_err(Error::CacheWrite)?;
|
||||
let canonical_url = cache_key::CanonicalUrl::new(url);
|
||||
let _lock = LockedFile::acquire(
|
||||
lock_dir.join(cache_key::digest(&canonical_url)),
|
||||
&canonical_url,
|
||||
)
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;
|
||||
|
||||
let source = if let Some(reporter) = reporter {
|
||||
GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone()))
|
||||
} else {
|
||||
GitSource::new(url, git_dir)
|
||||
};
|
||||
let fetch = tokio::task::spawn_blocking(move || source.fetch())
|
||||
.await?
|
||||
.map_err(Error::Git)?;
|
||||
Ok((fetch, subdirectory))
|
||||
}
|
||||
|
||||
/// Download and extract a source distribution from a URL.
|
||||
///
|
||||
/// This function will download the source distribution from the given URL, and extract it into a
|
||||
|
|
|
@ -23,7 +23,7 @@ use crate::FetchStrategy;
|
|||
const CHECKOUT_READY_LOCK: &str = ".ok";
|
||||
|
||||
/// A reference to commit or commit-ish.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) enum GitReference {
|
||||
/// From a branch.
|
||||
#[allow(unused)]
|
||||
|
|
|
@ -12,7 +12,7 @@ mod source;
|
|||
mod util;
|
||||
|
||||
/// A URL reference to a Git repository.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct GitUrl {
|
||||
/// The URL of the Git repository, with any query parameters and fragments removed.
|
||||
repository: Url,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
/// A complete Git SHA, i.e., a 40-character hexadecimal representation of a Git commit.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct GitSha(git2::Oid);
|
||||
|
||||
impl GitSha {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue