mirror of
https://github.com/astral-sh/uv.git
synced 2025-10-10 10:32:09 +00:00
Write fully-precise Git SHAs to pip-compile
output (#299)
This PR adds a mechanism by which we can ensure that we _always_ try to refresh Git dependencies when resolving; further, we now write the fully resolved SHA to the "lockfile". However, nothing in the code _assumes_ we do this, so the installer will remain agnostic to this behavior. The specific approach taken here is minimally invasive. Specifically, when we try to fetch a source distribution, we check if it's a Git dependency; if it is, we fetch, and return the exact SHA, which we then map back to a new URL. In the resolver, we keep track of URL "redirects", and then we use the redirect (1) for the actual source distribution building, and (2) when writing back out to the lockfile. As such, none of the types outside of the resolver change at all, since we're just mapping `RemoteDistribution` to `RemoteDistribution`, but swapping out the internal URLs. There are some inefficiencies here since, e.g., we do the Git fetch, send back the "precise" URL, then a moment later, do a Git checkout of that URL (which will be _mostly_ a no-op -- since we have a full SHA, we don't have to fetch anything, but we _do_ check back on disk to see if the SHA is still checked out). A more efficient approach would be to return the path to the checked-out revision when we do this conversion to a "precise" URL, since we'd then only interact with the Git repo exactly once. But this runs the risk that the checked-out SHA changes between the time we make the "precise" URL and the time we build the source distribution. Closes #286.
This commit is contained in:
parent
addcfe533a
commit
fa1bbbbe08
14 changed files with 254 additions and 124 deletions
|
@ -9,47 +9,38 @@ use tracing::debug;
|
|||
|
||||
use puffin_cache::{digest, CanonicalUrl};
|
||||
|
||||
use crate::git::{GitReference, GitRemote};
|
||||
use crate::git::GitRemote;
|
||||
use crate::{FetchStrategy, Git};
|
||||
|
||||
/// A remote Git source that can be checked out locally.
|
||||
pub struct GitSource {
|
||||
/// The git remote which we're going to fetch from.
|
||||
remote: GitRemote,
|
||||
/// The Git reference from the manifest file.
|
||||
manifest_reference: GitReference,
|
||||
/// The revision which a git source is locked to.
|
||||
/// This is expected to be set after the Git repository is fetched.
|
||||
locked_rev: Option<git2::Oid>,
|
||||
/// The identifier of this source for Cargo's Git cache directory.
|
||||
/// See [`ident`] for more.
|
||||
ident: String,
|
||||
git: Git,
|
||||
/// The HTTP client to use for fetching.
|
||||
client: Client,
|
||||
/// The fetch strategy to use when cloning.
|
||||
strategy: FetchStrategy,
|
||||
/// The path to the Git source database.
|
||||
git: PathBuf,
|
||||
cache: PathBuf,
|
||||
}
|
||||
|
||||
impl GitSource {
|
||||
pub fn new(reference: Git, git: PathBuf) -> Self {
|
||||
pub fn new(git: Git, cache: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
remote: GitRemote::new(&reference.url),
|
||||
manifest_reference: reference.reference,
|
||||
locked_rev: reference.precise,
|
||||
ident: digest(&CanonicalUrl::new(&reference.url)),
|
||||
git,
|
||||
client: Client::new(),
|
||||
strategy: FetchStrategy::Libgit2,
|
||||
git,
|
||||
cache: cache.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fetch(self) -> Result<PathBuf> {
|
||||
pub fn fetch(self) -> Result<Fetch> {
|
||||
// The path to the repo, within the Git database.
|
||||
let db_path = self.git.join("db").join(&self.ident);
|
||||
let ident = digest(&CanonicalUrl::new(&self.git.url));
|
||||
let db_path = self.cache.join("db").join(&ident);
|
||||
|
||||
let (db, actual_rev) = match (self.locked_rev, self.remote.db_at(&db_path).ok()) {
|
||||
let remote = GitRemote::new(&self.git.url);
|
||||
let (db, actual_rev) = match (self.git.precise, remote.db_at(&db_path).ok()) {
|
||||
// If we have a locked revision, and we have a preexisting database
|
||||
// which has that revision, then no update needs to happen.
|
||||
(Some(rev), Some(db)) if db.contains(rev) => (db, rev),
|
||||
|
@ -59,12 +50,12 @@ impl GitSource {
|
|||
// situation that we have a locked revision but the database
|
||||
// doesn't have it.
|
||||
(locked_rev, db) => {
|
||||
debug!("Updating Git source: `{:?}`", self.remote);
|
||||
debug!("Updating Git source: `{:?}`", remote);
|
||||
|
||||
self.remote.checkout(
|
||||
remote.checkout(
|
||||
&db_path,
|
||||
db,
|
||||
&self.manifest_reference,
|
||||
&self.git.reference,
|
||||
locked_rev,
|
||||
self.strategy,
|
||||
&self.client,
|
||||
|
@ -80,12 +71,34 @@ impl GitSource {
|
|||
// filesystem. This will use hard links and such to ideally make the
|
||||
// checkout operation here pretty fast.
|
||||
let checkout_path = self
|
||||
.git
|
||||
.cache
|
||||
.join("checkouts")
|
||||
.join(&self.ident)
|
||||
.join(&ident)
|
||||
.join(short_id.as_str());
|
||||
db.copy_to(actual_rev, &checkout_path, self.strategy, &self.client)?;
|
||||
|
||||
Ok(checkout_path)
|
||||
Ok(Fetch {
|
||||
git: self.git.with_precise(actual_rev),
|
||||
path: checkout_path,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Fetch {
|
||||
/// The [`Git`] reference that was fetched.
|
||||
git: Git,
|
||||
/// The path to the checked out repository.
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl From<Fetch> for Git {
|
||||
fn from(fetch: Fetch) -> Self {
|
||||
fetch.git
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Fetch> for PathBuf {
|
||||
fn from(fetch: Fetch) -> Self {
|
||||
fetch.path
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue