Use git command to fetch repositories instead of libgit2 for robust SSH support (#1781)

Closes https://github.com/astral-sh/uv/issues/1775
Closes https://github.com/astral-sh/uv/issues/1452
Closes https://github.com/astral-sh/uv/issues/1514
Follows https://github.com/astral-sh/uv/pull/1717

libgit2 does not support host names with extra identifiers during SSH
lookup (e.g. [`github.com-some_identifier`](

https://docs.github.com/en/authentication/connecting-to-github-with-ssh/managing-deploy-keys#using-multiple-repositories-on-one-server))
so we use the `git` command instead for fetching. This is required for
`pip` parity.

See the [Cargo
documentation](https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli)
for more details on using the `git` CLI instead of libgit2. We may want
to try to use libgit2 first in the future, as it is more performant
(#1786).

We now support authentication with:

```
git+ssh://git@<hostname>/...
git+ssh://git@<hostname>-<identifier>/...
```

Tested with a deploy key e.g.

```
cargo run -- \
    pip install uv-private-pypackage@git+ssh://git@github.com-test-uv-private-pypackage/astral-test/uv-private-pypackage.git \
    --reinstall --no-cache -v
```

and

```
cargo run -- \
    pip install uv-private-pypackage@git+ssh://git@github.com/astral-test/uv-private-pypackage.git \
    --reinstall --no-cache -v     
```

with a ssh config like

```
Host github.com
        Hostname github.com
        IdentityFile=/Users/mz/.ssh/id_ed25519

Host github.com-test-uv-private-pypackage
        Hostname github.com
        IdentityFile=/Users/mz/.ssh/id_ed25519
```

It seems quite hard to add test coverage for this to the test suite, as
we'd need to add the SSH key and I don't know how to isolate that from
affecting other developer's machines.
This commit is contained in:
Zanie Blue 2024-02-21 12:44:32 -06:00 committed by GitHub
parent b4bc40627c
commit 71ec568d0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 179 additions and 12 deletions

View file

@ -11,7 +11,7 @@ use cargo_util::{paths, ProcessBuilder};
use git2::{self, ErrorClass, ObjectType};
use reqwest::Client;
use reqwest::StatusCode;
use tracing::{debug, warn};
use tracing::{debug, error, warn};
use url::Url;
use uv_fs::Normalized;
@ -43,6 +43,14 @@ pub(crate) enum GitReference {
DefaultBranch,
}
/// Strategy when fetching refspecs for a [`GitReference`]
enum RefspecStrategy {
// All refspecs should be fetched, if any fail then the fetch will fail
All,
// Stop after the first successful fetch, if none suceed then the fetch will fail
First,
}
impl GitReference {
pub(crate) fn from_rev(rev: &str) -> Self {
if rev.starts_with("refs/") {
@ -908,6 +916,7 @@ pub(crate) fn fetch(
// which need to get fetched. Additionally record if we're fetching tags.
let mut refspecs = Vec::new();
let mut tags = false;
let mut refspec_strategy = RefspecStrategy::All;
// The `+` symbol on the refspec means to allow a forced (fast-forward)
// update which is needed if there is ever a force push that requires a
// fast-forward.
@ -929,6 +938,7 @@ pub(crate) fn fetch(
refspecs.push(format!(
"+refs/tags/{branch_or_tag}:refs/remotes/origin/tags/{branch_or_tag}"
));
refspec_strategy = RefspecStrategy::First;
}
GitReference::DefaultBranch => {
@ -969,13 +979,49 @@ pub(crate) fn fetch(
debug!("Performing a Git fetch for: {remote_url}");
match strategy {
FetchStrategy::Cli => fetch_with_cli(repo, remote_url, &refspecs, tags),
FetchStrategy::Cli => {
match refspec_strategy {
RefspecStrategy::All => fetch_with_cli(repo, remote_url, refspecs.as_slice(), tags),
RefspecStrategy::First => {
let num_refspecs = refspecs.len();
// Try each refspec
let errors = refspecs
.into_iter()
.map(|refspec| {
(
refspec.clone(),
fetch_with_cli(repo, remote_url, &[refspec], tags),
)
})
// Stop after the first success
.take_while(|(_, result)| result.is_err())
.collect::<Vec<_>>();
if errors.len() == num_refspecs {
// If all of the fetches failed, report to the user
for (refspec, err) in errors {
if let Err(err) = err {
error!("failed to fetch refspec `{refspec}`: {err}");
}
}
Err(anyhow!("failed to fetch all refspecs"))
} else {
Ok(())
}
}
}
}
FetchStrategy::Libgit2 => {
// Libgit2 does not fail if a refspec is missing, so the `refspec_strategy`
// is not handled here
let git_config = git2::Config::open_default()?;
with_fetch_options(&git_config, remote_url, &mut |mut opts| {
if tags {
opts.download_tags(git2::AutotagOption::All);
}
// The `fetch` operation here may fail spuriously due to a corrupt
// repository. It could also fail, however, for a whole slew of other
// reasons (aka network related reasons). We want Cargo to automatically
@ -1057,7 +1103,11 @@ fn fetch_with_cli(
.env_remove("GIT_OBJECT_DIRECTORY")
.env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
.cwd(repo.path());
cmd.exec()?;
// We capture the output to avoid streaming it to the user's console during clones.
// The required `on...line` callbacks currently do nothing.
// The output appears to be included in error messages by default.
cmd.exec_with_streaming(&mut |_| Ok(()), &mut |_| Ok(()), true)?;
Ok(())
}

View file

@ -33,7 +33,7 @@ impl GitSource {
Self {
git,
client: Client::new(),
strategy: FetchStrategy::Libgit2,
strategy: FetchStrategy::Cli,
cache: cache.into(),
reporter: None,
}