mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-29 03:02:55 +00:00
304 lines
11 KiB
Rust
304 lines
11 KiB
Rust
use std::path::PathBuf;
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
use anyhow::Result;
|
|
use fs_err::tokio as fs;
|
|
use once_cell::sync::Lazy;
|
|
use rustc_hash::FxHashMap;
|
|
use tracing::debug;
|
|
use url::Url;
|
|
|
|
use cache_key::{CanonicalUrl, RepositoryUrl};
|
|
use distribution_types::ParsedGitUrl;
|
|
use uv_cache::{Cache, CacheBucket};
|
|
use uv_fs::LockedFile;
|
|
use uv_git::{Fetch, GitReference, GitSha, GitSource, GitUrl};
|
|
|
|
use crate::error::Error;
|
|
use crate::reporter::Facade;
|
|
use crate::Reporter;
|
|
|
|
/// Global cache of resolved Git references.
|
|
///
|
|
/// Used to ensure that a given Git URL is only resolved once, and that the resolved URL is
|
|
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
|
|
/// then the resolved URL should always refer to the same commit across the lifetime of the
|
|
/// process.)
|
|
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<RepositoryReference, GitSha>>> =
|
|
Lazy::new(Mutex::default);
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
|
struct RepositoryReference {
|
|
/// The URL of the Git repository, with any query parameters and fragments removed.
|
|
url: RepositoryUrl,
|
|
/// The reference to the commit to use, which could be a branch, tag or revision.
|
|
reference: GitReference,
|
|
}
|
|
|
|
impl RepositoryReference {
|
|
fn new(git: &GitUrl) -> Self {
|
|
Self {
|
|
url: RepositoryUrl::new(git.repository()),
|
|
reference: git.reference().clone(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Download a source distribution from a Git repository.
|
|
///
|
|
/// Assumes that the URL is a precise Git URL, with a full commit hash.
|
|
pub(crate) async fn fetch_git_archive(
|
|
url: &Url,
|
|
cache: &Cache,
|
|
reporter: Option<&Arc<dyn Reporter>>,
|
|
) -> Result<(Fetch, Option<PathBuf>), Error> {
|
|
debug!("Fetching source distribution from Git: {url}");
|
|
let git_dir = cache.bucket(CacheBucket::Git);
|
|
|
|
// Avoid races between different processes, too.
|
|
let lock_dir = git_dir.join("locks");
|
|
fs::create_dir_all(&lock_dir)
|
|
.await
|
|
.map_err(Error::CacheWrite)?;
|
|
let repository_url = RepositoryUrl::new(url);
|
|
let _lock = LockedFile::acquire(
|
|
lock_dir.join(cache_key::digest(&repository_url)),
|
|
&repository_url,
|
|
)
|
|
.map_err(Error::CacheWrite)?;
|
|
|
|
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).map_err(Box::new)?;
|
|
|
|
// Fetch the Git repository.
|
|
let source = if let Some(reporter) = reporter {
|
|
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
|
|
} else {
|
|
GitSource::new(url.clone(), git_dir)
|
|
};
|
|
let fetch = tokio::task::spawn_blocking(move || source.fetch())
|
|
.await?
|
|
.map_err(Error::Git)?;
|
|
|
|
Ok((fetch, subdirectory))
|
|
}
|
|
|
|
/// Given a remote source distribution, return a precise variant, if possible.
|
|
///
|
|
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
|
|
/// with a precise reference to the current commit of that branch or tag.
|
|
///
|
|
/// This method takes into account various normalizations that are independent from the Git
|
|
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
|
|
/// prefix kinds.
|
|
pub(crate) async fn resolve_precise(
|
|
url: &Url,
|
|
cache: &Cache,
|
|
reporter: Option<&Arc<dyn Reporter>>,
|
|
) -> Result<Option<Url>, Error> {
|
|
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).map_err(Box::new)?;
|
|
|
|
// If the Git reference already contains a complete SHA, short-circuit.
|
|
if url.precise().is_some() {
|
|
return Ok(None);
|
|
}
|
|
|
|
// If the Git reference is in the in-memory cache, return it.
|
|
{
|
|
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
|
let reference = RepositoryReference::new(&url);
|
|
if let Some(precise) = resolved_git_refs.get(&reference) {
|
|
return Ok(Some(Url::from(ParsedGitUrl {
|
|
url: url.with_precise(*precise),
|
|
subdirectory,
|
|
})));
|
|
}
|
|
}
|
|
|
|
let git_dir = cache.bucket(CacheBucket::Git);
|
|
|
|
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
|
|
// commit, etc.).
|
|
let source = if let Some(reporter) = reporter {
|
|
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
|
|
} else {
|
|
GitSource::new(url.clone(), git_dir)
|
|
};
|
|
let fetch = tokio::task::spawn_blocking(move || source.fetch())
|
|
.await?
|
|
.map_err(Error::Git)?;
|
|
let git = fetch.into_git();
|
|
|
|
// Insert the resolved URL into the in-memory cache.
|
|
if let Some(precise) = git.precise() {
|
|
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
|
let reference = RepositoryReference::new(&url);
|
|
resolved_git_refs.insert(reference, precise);
|
|
}
|
|
|
|
// Re-encode as a URL.
|
|
Ok(Some(Url::from(ParsedGitUrl {
|
|
url: git,
|
|
subdirectory,
|
|
})))
|
|
}
|
|
|
|
/// Given a remote source distribution, return a precise variant, if possible.
|
|
///
|
|
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
|
|
/// with a precise reference to the current commit of that branch or tag.
|
|
///
|
|
/// This method takes into account various normalizations that are independent from the Git
|
|
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
|
|
/// prefix kinds.
|
|
///
|
|
/// This method will only return precise URLs for URLs that have already been resolved via
|
|
/// [`resolve_precise`].
|
|
pub fn to_precise(url: &Url) -> Option<Url> {
|
|
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).ok()?;
|
|
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
|
let reference = RepositoryReference::new(&url);
|
|
let precise = resolved_git_refs.get(&reference)?;
|
|
Some(Url::from(ParsedGitUrl {
|
|
url: url.with_precise(*precise),
|
|
subdirectory,
|
|
}))
|
|
}
|
|
|
|
/// Returns `true` if the URLs refer to the same Git commit.
|
|
///
|
|
/// For example, the previous URL could be a branch or tag, while the current URL would be a
|
|
/// precise commit hash.
|
|
pub fn is_same_reference<'a>(a: &'a Url, b: &'a Url) -> bool {
|
|
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
|
|
is_same_reference_impl(a, b, &resolved_git_refs)
|
|
}
|
|
|
|
/// Returns `true` if the URLs refer to the same Git commit.
|
|
///
|
|
/// Like [`is_same_reference`], but accepts a resolved reference cache for testing.
|
|
fn is_same_reference_impl<'a>(
|
|
a: &'a Url,
|
|
b: &'a Url,
|
|
resolved_refs: &FxHashMap<RepositoryReference, GitSha>,
|
|
) -> bool {
|
|
// Convert `a` to a Git URL, if possible.
|
|
let Ok(a_git) = ParsedGitUrl::try_from(&Url::from(CanonicalUrl::new(a))) else {
|
|
return false;
|
|
};
|
|
|
|
// Convert `b` to a Git URL, if possible.
|
|
let Ok(b_git) = ParsedGitUrl::try_from(&Url::from(CanonicalUrl::new(b))) else {
|
|
return false;
|
|
};
|
|
|
|
// The URLs must refer to the same subdirectory, if any.
|
|
if a_git.subdirectory != b_git.subdirectory {
|
|
return false;
|
|
}
|
|
|
|
// Convert `a` to a repository URL.
|
|
let a_ref = RepositoryReference::new(&a_git.url);
|
|
|
|
// Convert `b` to a repository URL.
|
|
let b_ref = RepositoryReference::new(&b_git.url);
|
|
|
|
// The URLs must refer to the same repository.
|
|
if a_ref.url != b_ref.url {
|
|
return false;
|
|
}
|
|
|
|
// If the URLs have the same tag, they refer to the same commit.
|
|
if a_ref.reference == b_ref.reference {
|
|
return true;
|
|
}
|
|
|
|
// Otherwise, the URLs must resolve to the same precise commit.
|
|
let Some(a_precise) = a_git
|
|
.url
|
|
.precise()
|
|
.or_else(|| resolved_refs.get(&a_ref).copied())
|
|
else {
|
|
return false;
|
|
};
|
|
|
|
let Some(b_precise) = b_git
|
|
.url
|
|
.precise()
|
|
.or_else(|| resolved_refs.get(&b_ref).copied())
|
|
else {
|
|
return false;
|
|
};
|
|
|
|
a_precise == b_precise
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use anyhow::Result;
|
|
use rustc_hash::FxHashMap;
|
|
use std::str::FromStr;
|
|
use url::Url;
|
|
|
|
use crate::git::RepositoryReference;
|
|
use uv_git::{GitSha, GitUrl};
|
|
|
|
#[test]
|
|
fn same_reference() -> Result<()> {
|
|
let empty = FxHashMap::default();
|
|
|
|
// Same repository, same tag.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
let b = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
assert!(super::is_same_reference_impl(&a, &b, &empty));
|
|
|
|
// Same repository, same tag, same subdirectory.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
|
|
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
|
|
assert!(super::is_same_reference_impl(&a, &b, &empty));
|
|
|
|
// Different repositories, same tag.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
let b = Url::parse("git+https://example.com/MyOtherProject.git@main")?;
|
|
assert!(!super::is_same_reference_impl(&a, &b, &empty));
|
|
|
|
// Same repository, different tags.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
let b = Url::parse("git+https://example.com/MyProject.git@v1.0")?;
|
|
assert!(!super::is_same_reference_impl(&a, &b, &empty));
|
|
|
|
// Same repository, same tag, different subdirectory.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
|
|
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=other_dir")?;
|
|
assert!(!super::is_same_reference_impl(&a, &b, &empty));
|
|
|
|
// Same repository, different tags, but same precise commit.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
let b = Url::parse(
|
|
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
|
|
)?;
|
|
let mut resolved_refs = FxHashMap::default();
|
|
resolved_refs.insert(
|
|
RepositoryReference::new(&GitUrl::try_from(Url::parse(
|
|
"https://example.com/MyProject@main",
|
|
)?)?),
|
|
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
|
|
);
|
|
assert!(super::is_same_reference_impl(&a, &b, &resolved_refs));
|
|
|
|
// Same repository, different tags, different precise commit.
|
|
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
|
|
let b = Url::parse(
|
|
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
|
|
)?;
|
|
let mut resolved_refs = FxHashMap::default();
|
|
resolved_refs.insert(
|
|
RepositoryReference::new(&GitUrl::try_from(Url::parse(
|
|
"https://example.com/MyProject@main",
|
|
)?)?),
|
|
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
|
|
);
|
|
assert!(!super::is_same_reference_impl(&a, &b, &resolved_refs));
|
|
|
|
Ok(())
|
|
}
|
|
}
|