uv/crates/uv-distribution/src/git.rs
2024-04-23 14:51:23 +00:00

304 lines
11 KiB
Rust

use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use anyhow::Result;
use fs_err::tokio as fs;
use once_cell::sync::Lazy;
use rustc_hash::FxHashMap;
use tracing::debug;
use url::Url;
use cache_key::{CanonicalUrl, RepositoryUrl};
use distribution_types::ParsedGitUrl;
use uv_cache::{Cache, CacheBucket};
use uv_fs::LockedFile;
use uv_git::{Fetch, GitReference, GitSha, GitSource, GitUrl};
use crate::error::Error;
use crate::reporter::Facade;
use crate::Reporter;
/// Global cache of resolved Git references.
///
/// Used to ensure that a given Git URL is only resolved once, and that the resolved URL is
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
/// then the resolved URL should always refer to the same commit across the lifetime of the
/// process.)
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<RepositoryReference, GitSha>>> =
Lazy::new(Mutex::default);
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct RepositoryReference {
/// The URL of the Git repository, with any query parameters and fragments removed.
url: RepositoryUrl,
/// The reference to the commit to use, which could be a branch, tag or revision.
reference: GitReference,
}
impl RepositoryReference {
fn new(git: &GitUrl) -> Self {
Self {
url: RepositoryUrl::new(git.repository()),
reference: git.reference().clone(),
}
}
}
/// Download a source distribution from a Git repository.
///
/// Assumes that the URL is a precise Git URL, with a full commit hash.
pub(crate) async fn fetch_git_archive(
url: &Url,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<(Fetch, Option<PathBuf>), Error> {
debug!("Fetching source distribution from Git: {url}");
let git_dir = cache.bucket(CacheBucket::Git);
// Avoid races between different processes, too.
let lock_dir = git_dir.join("locks");
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let repository_url = RepositoryUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&repository_url)),
&repository_url,
)
.map_err(Error::CacheWrite)?;
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).map_err(Box::new)?;
// Fetch the Git repository.
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
} else {
GitSource::new(url.clone(), git_dir)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
Ok((fetch, subdirectory))
}
/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
pub(crate) async fn resolve_precise(
url: &Url,
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<Option<Url>, Error> {
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).map_err(Box::new)?;
// If the Git reference already contains a complete SHA, short-circuit.
if url.precise().is_some() {
return Ok(None);
}
// If the Git reference is in the in-memory cache, return it.
{
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
if let Some(precise) = resolved_git_refs.get(&reference) {
return Ok(Some(Url::from(ParsedGitUrl {
url: url.with_precise(*precise),
subdirectory,
})));
}
}
let git_dir = cache.bucket(CacheBucket::Git);
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.).
let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), git_dir).with_reporter(Facade::from(reporter.clone()))
} else {
GitSource::new(url.clone(), git_dir)
};
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let git = fetch.into_git();
// Insert the resolved URL into the in-memory cache.
if let Some(precise) = git.precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}
// Re-encode as a URL.
Ok(Some(Url::from(ParsedGitUrl {
url: git,
subdirectory,
})))
}
/// Given a remote source distribution, return a precise variant, if possible.
///
/// For example, given a Git dependency with a reference to a branch or tag, return a URL
/// with a precise reference to the current commit of that branch or tag.
///
/// This method takes into account various normalizations that are independent from the Git
/// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+`
/// prefix kinds.
///
/// This method will only return precise URLs for URLs that have already been resolved via
/// [`resolve_precise`].
pub fn to_precise(url: &Url) -> Option<Url> {
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url).ok()?;
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let precise = resolved_git_refs.get(&reference)?;
Some(Url::from(ParsedGitUrl {
url: url.with_precise(*precise),
subdirectory,
}))
}
/// Returns `true` if the URLs refer to the same Git commit.
///
/// For example, the previous URL could be a branch or tag, while the current URL would be a
/// precise commit hash.
pub fn is_same_reference<'a>(a: &'a Url, b: &'a Url) -> bool {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
is_same_reference_impl(a, b, &resolved_git_refs)
}
/// Returns `true` if the URLs refer to the same Git commit.
///
/// Like [`is_same_reference`], but accepts a resolved reference cache for testing.
fn is_same_reference_impl<'a>(
a: &'a Url,
b: &'a Url,
resolved_refs: &FxHashMap<RepositoryReference, GitSha>,
) -> bool {
// Convert `a` to a Git URL, if possible.
let Ok(a_git) = ParsedGitUrl::try_from(&Url::from(CanonicalUrl::new(a))) else {
return false;
};
// Convert `b` to a Git URL, if possible.
let Ok(b_git) = ParsedGitUrl::try_from(&Url::from(CanonicalUrl::new(b))) else {
return false;
};
// The URLs must refer to the same subdirectory, if any.
if a_git.subdirectory != b_git.subdirectory {
return false;
}
// Convert `a` to a repository URL.
let a_ref = RepositoryReference::new(&a_git.url);
// Convert `b` to a repository URL.
let b_ref = RepositoryReference::new(&b_git.url);
// The URLs must refer to the same repository.
if a_ref.url != b_ref.url {
return false;
}
// If the URLs have the same tag, they refer to the same commit.
if a_ref.reference == b_ref.reference {
return true;
}
// Otherwise, the URLs must resolve to the same precise commit.
let Some(a_precise) = a_git
.url
.precise()
.or_else(|| resolved_refs.get(&a_ref).copied())
else {
return false;
};
let Some(b_precise) = b_git
.url
.precise()
.or_else(|| resolved_refs.get(&b_ref).copied())
else {
return false;
};
a_precise == b_precise
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use rustc_hash::FxHashMap;
use std::str::FromStr;
use url::Url;
use crate::git::RepositoryReference;
use uv_git::{GitSha, GitUrl};
#[test]
fn same_reference() -> Result<()> {
let empty = FxHashMap::default();
// Same repository, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@main")?;
assert!(super::is_same_reference_impl(&a, &b, &empty));
// Same repository, same tag, same subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
assert!(super::is_same_reference_impl(&a, &b, &empty));
// Different repositories, same tag.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyOtherProject.git@main")?;
assert!(!super::is_same_reference_impl(&a, &b, &empty));
// Same repository, different tags.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse("git+https://example.com/MyProject.git@v1.0")?;
assert!(!super::is_same_reference_impl(&a, &b, &empty));
// Same repository, same tag, different subdirectory.
let a = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=pkg_dir")?;
let b = Url::parse("git+https://example.com/MyProject.git@main#subdirectory=other_dir")?;
assert!(!super::is_same_reference_impl(&a, &b, &empty));
// Same repository, different tags, but same precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
);
assert!(super::is_same_reference_impl(&a, &b, &resolved_refs));
// Same repository, different tags, different precise commit.
let a = Url::parse("git+https://example.com/MyProject.git@main")?;
let b = Url::parse(
"git+https://example.com/MyProject.git@164a8735b081663fede48c5041667b194da15d25",
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
);
assert!(!super::is_same_reference_impl(&a, &b, &resolved_refs));
Ok(())
}
}