From 97c1877f6f0e9263802d33a2964ef46c825bc428 Mon Sep 17 00:00:00 2001 From: Sydney Duckworth <3812736+sydduckworth@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:48:06 -0500 Subject: [PATCH] Add Git LFS support to `uv-git` crate (#10335) ## Summary Closes #3312. This PR adds Git LFS support to the `uv-git` crate by using the `git-lfs` CLI to fetch required LFS objects for a revision following the call to `git fetch`. The LFS fetch step is disabled by default and only enabled if the environment variable `UV_GIT_LFS` is set. When enabled, the LFS fetch step is run for all repositories regardless of whether they have associated LFS objects. The step is skipped if the `git-lfs` CLI tool isn't installed. ## Test Plan I verified that the minimal example in the linked issue passes, i.e. this command now succeeds: ```sh UV_GIT_LFS=1 uv pip install git+https://github.com/grebnetiew/lfs-py.git ``` I also verified that non-LFS repositories still work, with or without `git-lfs` installed. ### To Replicate Attempt to use uv to install a Git dependency that contains LFS objects (e.g. `uv pip install git+https://github.com/grebnetiew/lfs-py.git`). This should fail with a smudge filter error. Re-run the same command with the added environment variable `UV_GIT_LFS=1`. The install should now succeed. ## Potential Changes / Improvements ~With this change LFS objects in a given revision will always be downloaded if the user has Git LFS installed, which may not always be desired behavior. It might be helpful to add a field to the `uv` settings and/or an environment variable so that the LFS step can be disabled if needed.~ Enabling/disabled via environment variable has now been implemented. --------- Co-authored-by: Sydney Duckworth Co-authored-by: Zanie Blue --- crates/uv-git/src/git.rs | 53 ++++++++++++++++++++++++++++++- crates/uv-static/src/env_vars.rs | 3 ++ docs/configuration/environment.md | 4 +++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index d08cecd95..ca9c82fe3 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -1,6 +1,7 @@ //! Git support is derived from Cargo's implementation. //! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. //! Source: +use std::env; use std::fmt::Display; use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; @@ -13,7 +14,7 @@ use cargo_util::{paths, ProcessBuilder}; use reqwest::StatusCode; use reqwest_middleware::ClientWithMiddleware; -use tracing::debug; +use tracing::{debug, warn}; use url::Url; use uv_fs::Simplified; use uv_static::EnvVars; @@ -251,6 +252,8 @@ impl GitRemote { ) -> Result<(GitDatabase, GitOid)> { let locked_ref = locked_rev.map(|oid| GitReference::FullCommit(oid.to_string())); let reference = locked_ref.as_ref().unwrap_or(reference); + let enable_lfs_fetch = env::var(EnvVars::UV_GIT_LFS).is_ok(); + if let Some(mut db) = db { fetch(&mut db.repo, self.url.as_str(), reference, client) .with_context(|| format!("failed to fetch into: {}", into.user_display()))?; @@ -261,6 +264,10 @@ impl GitRemote { }; if let Some(rev) = resolved_commit_hash { + if enable_lfs_fetch { + fetch_lfs(&mut db.repo, self.url.as_str(), &rev) + .with_context(|| format!("failed to fetch LFS objects at {rev}"))?; + } return Ok((db, rev)); } } @@ -280,6 +287,10 @@ impl GitRemote { Some(rev) => rev, None => reference.resolve(&repo)?, }; + if enable_lfs_fetch { + fetch_lfs(&mut repo, self.url.as_str(), &rev) + .with_context(|| format!("failed to fetch LFS objects at {rev}"))?; + } Ok((GitDatabase { repo }, rev)) } @@ -635,6 +646,46 @@ fn fetch_with_cli( // The required `on...line` callbacks currently do nothing. // The output appears to be included in error messages by default. cmd.exec_with_output()?; + + Ok(()) +} + +/// A global cache of the `git lfs` command. +/// +/// Returns an error if Git LFS isn't available. +/// Caching the command allows us to only check if LFS is installed once. +static GIT_LFS: LazyLock> = LazyLock::new(|| { + let mut cmd = ProcessBuilder::new(GIT.as_ref()?); + cmd.arg("lfs"); + + // Run a simple command to verify LFS is installed + cmd.clone().arg("version").exec_with_output()?; + Ok(cmd) +}); + +/// Attempts to use `git-lfs` CLI to fetch required LFS objects for a given revision. +fn fetch_lfs(repo: &mut GitRepository, url: &str, revision: &GitOid) -> Result<()> { + let mut cmd = if let Ok(lfs) = GIT_LFS.as_ref() { + debug!("Fetching Git LFS objects"); + lfs.clone() + } else { + // Since this feature is opt-in, warn if not available + warn!("Git LFS is not available, skipping LFS fetch"); + return Ok(()); + }; + + cmd.arg("fetch") + .arg(url) + .arg(revision.as_str()) + // These variables are unset for the same reason as in `fetch_with_cli`. + .env_remove(EnvVars::GIT_DIR) + .env_remove(EnvVars::GIT_WORK_TREE) + .env_remove(EnvVars::GIT_INDEX_FILE) + .env_remove(EnvVars::GIT_OBJECT_DIRECTORY) + .env_remove(EnvVars::GIT_ALTERNATE_OBJECT_DIRECTORIES) + .cwd(&repo.path); + + cmd.exec_with_output()?; Ok(()) } diff --git a/crates/uv-static/src/env_vars.rs b/crates/uv-static/src/env_vars.rs index 7f6948967..ebc60db4e 100644 --- a/crates/uv-static/src/env_vars.rs +++ b/crates/uv-static/src/env_vars.rs @@ -575,4 +575,7 @@ impl EnvVars { /// Skip writing `uv` installer metadata files (e.g., `INSTALLER`, `REQUESTED`, and `direct_url.json`) to site-packages `.dist-info` directories. pub const UV_NO_INSTALLER_METADATA: &'static str = "UV_NO_INSTALLER_METADATA"; + + /// Enables fetching files stored in Git LFS when installing a package from a Git repository. + pub const UV_GIT_LFS: &'static str = "UV_GIT_LFS"; } diff --git a/docs/configuration/environment.md b/docs/configuration/environment.md index 6f9e31988..af640be28 100644 --- a/docs/configuration/environment.md +++ b/docs/configuration/environment.md @@ -96,6 +96,10 @@ updating the `uv.lock` file. Equivalent to the `--token` argument for self update. A GitHub token for authentication. +### `UV_GIT_LFS` + +Enables fetching files stored in Git LFS when installing a package from a Git repository. + ### `UV_HTTP_TIMEOUT` Timeout (in seconds) for HTTP requests. (default: 30 s)