Use shared client in Git fetch implementation (#4487)

## Summary

It turns out that the Git fetch implementation is initializing its own
client, which can be really expensive on macOS (due to loading native
certificates) _and_ bypasses any of our middleware. This PR modifies the
Git implementation to accept a shared client.
This commit is contained in:
Charlie Marsh 2024-06-25 00:09:29 +03:00 committed by GitHub
parent 7221514136
commit 9905521957
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 39 additions and 21 deletions

1
Cargo.lock generated
View file

@ -4829,6 +4829,7 @@ dependencies = [
"dashmap", "dashmap",
"fs-err", "fs-err",
"reqwest", "reqwest",
"reqwest-middleware",
"thiserror", "thiserror",
"tokio", "tokio",
"tracing", "tracing",

View file

@ -182,7 +182,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Dist(SourceDist::Git(dist)) => { BuildableSource::Dist(SourceDist::Git(dist)) => {
self.git(source, &GitSourceUrl::from(dist), tags, hashes) self.git(source, &GitSourceUrl::from(dist), tags, hashes, client)
.boxed_local() .boxed_local()
.await? .await?
} }
@ -234,7 +234,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Url(SourceUrl::Git(resource)) => { BuildableSource::Url(SourceUrl::Git(resource)) => {
self.git(source, resource, tags, hashes) self.git(source, resource, tags, hashes, client)
.boxed_local() .boxed_local()
.await? .await?
} }
@ -356,7 +356,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Dist(SourceDist::Git(dist)) => { BuildableSource::Dist(SourceDist::Git(dist)) => {
self.git_metadata(source, &GitSourceUrl::from(dist), hashes) self.git_metadata(source, &GitSourceUrl::from(dist), hashes, client)
.boxed_local() .boxed_local()
.await? .await?
} }
@ -401,7 +401,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Url(SourceUrl::Git(resource)) => { BuildableSource::Url(SourceUrl::Git(resource)) => {
self.git_metadata(source, resource, hashes) self.git_metadata(source, resource, hashes, client)
.boxed_local() .boxed_local()
.await? .await?
} }
@ -1089,6 +1089,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
resource: &GitSourceUrl<'_>, resource: &GitSourceUrl<'_>,
tags: &Tags, tags: &Tags,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
client: &ManagedClient<'_>,
) -> Result<BuiltWheelMetadata, Error> { ) -> Result<BuiltWheelMetadata, Error> {
// Before running the build, check that the hashes match. // Before running the build, check that the hashes match.
if hashes.is_validate() { if hashes.is_validate() {
@ -1101,6 +1102,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git() .git()
.resolve( .resolve(
resource.git, resource.git,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git), self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from), self.reporter.clone().map(Facade::from),
) )
@ -1117,6 +1119,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git() .git()
.fetch( .fetch(
&url, &url,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git), self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from), self.reporter.clone().map(Facade::from),
) )
@ -1173,6 +1176,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
source: &BuildableSource<'_>, source: &BuildableSource<'_>,
resource: &GitSourceUrl<'_>, resource: &GitSourceUrl<'_>,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
client: &ManagedClient<'_>,
) -> Result<ArchiveMetadata, Error> { ) -> Result<ArchiveMetadata, Error> {
// Before running the build, check that the hashes match. // Before running the build, check that the hashes match.
if hashes.is_validate() { if hashes.is_validate() {
@ -1185,6 +1189,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git() .git()
.resolve( .resolve(
resource.git, resource.git,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git), self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from), self.reporter.clone().map(Facade::from),
) )
@ -1201,6 +1206,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.git() .git()
.fetch( .fetch(
&url, &url,
client.unmanaged.uncached_client().client(),
self.build_context.cache().bucket(CacheBucket::Git), self.build_context.cache().bucket(CacheBucket::Git),
self.reporter.clone().map(Facade::from), self.reporter.clone().map(Facade::from),
) )

View file

@ -21,6 +21,7 @@ cargo-util = { workspace = true }
dashmap = { workspace = true } dashmap = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] }
reqwest = { workspace = true, features = ["blocking"] } reqwest = { workspace = true, features = ["blocking"] }
reqwest-middleware = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View file

@ -7,8 +7,8 @@ use std::str::{self};
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use cargo_util::{paths, ProcessBuilder}; use cargo_util::{paths, ProcessBuilder};
use reqwest::Client;
use reqwest::StatusCode; use reqwest::StatusCode;
use reqwest_middleware::ClientWithMiddleware;
use tracing::debug; use tracing::debug;
use url::Url; use url::Url;
use uv_fs::Simplified; use uv_fs::Simplified;
@ -218,7 +218,7 @@ impl GitRemote {
db: Option<GitDatabase>, db: Option<GitDatabase>,
reference: &GitReference, reference: &GitReference,
locked_rev: Option<GitOid>, locked_rev: Option<GitOid>,
client: &Client, client: &ClientWithMiddleware,
) -> Result<(GitDatabase, GitOid)> { ) -> Result<(GitDatabase, GitOid)> {
let locked_ref = locked_rev.map(|oid| GitReference::FullCommit(oid.to_string())); let locked_ref = locked_rev.map(|oid| GitReference::FullCommit(oid.to_string()));
let reference = locked_ref.as_ref().unwrap_or(reference); let reference = locked_ref.as_ref().unwrap_or(reference);
@ -344,13 +344,13 @@ impl GitCheckout {
/// is done. Use [`GitCheckout::is_fresh`] to check. /// is done. Use [`GitCheckout::is_fresh`] to check.
/// ///
/// * The `repo` will be the checked out Git repository. /// * The `repo` will be the checked out Git repository.
fn new(revision: GitOid, repo: GitRepository) -> GitCheckout { fn new(revision: GitOid, repo: GitRepository) -> Self {
GitCheckout { revision, repo } Self { revision, repo }
} }
/// Clone a repo for a `revision` into a local path from a `database`. /// Clone a repo for a `revision` into a local path from a `database`.
/// This is a filesystem-to-filesystem clone. /// This is a filesystem-to-filesystem clone.
fn clone_into(into: &Path, database: &GitDatabase, revision: GitOid) -> Result<GitCheckout> { fn clone_into(into: &Path, database: &GitDatabase, revision: GitOid) -> Result<Self> {
let dirname = into.parent().unwrap(); let dirname = into.parent().unwrap();
paths::create_dir_all(dirname)?; paths::create_dir_all(dirname)?;
if into.exists() { if into.exists() {
@ -440,7 +440,7 @@ pub(crate) fn fetch(
repo: &mut GitRepository, repo: &mut GitRepository,
remote_url: &str, remote_url: &str,
reference: &GitReference, reference: &GitReference,
client: &Client, client: &ClientWithMiddleware,
) -> Result<()> { ) -> Result<()> {
let oid_to_fetch = match github_fast_path(repo, remote_url, reference, client) { let oid_to_fetch = match github_fast_path(repo, remote_url, reference, client) {
Ok(FastPathRev::UpToDate) => return Ok(()), Ok(FastPathRev::UpToDate) => return Ok(()),
@ -639,7 +639,7 @@ fn github_fast_path(
repo: &mut GitRepository, repo: &mut GitRepository,
url: &str, url: &str,
reference: &GitReference, reference: &GitReference,
client: &Client, client: &ClientWithMiddleware,
) -> Result<FastPathRev> { ) -> Result<FastPathRev> {
let url = Url::parse(url)?; let url = Url::parse(url)?;
if !is_github(&url) { if !is_github(&url) {

View file

@ -7,6 +7,7 @@ use cache_key::RepositoryUrl;
use dashmap::mapref::one::Ref; use dashmap::mapref::one::Ref;
use dashmap::DashMap; use dashmap::DashMap;
use fs_err::tokio as fs; use fs_err::tokio as fs;
use reqwest_middleware::ClientWithMiddleware;
use uv_fs::LockedFile; use uv_fs::LockedFile;
use crate::{Fetch, GitReference, GitSha, GitSource, GitUrl, Reporter}; use crate::{Fetch, GitReference, GitSha, GitSource, GitUrl, Reporter};
@ -51,6 +52,7 @@ impl GitResolver {
pub async fn fetch( pub async fn fetch(
&self, &self,
url: &GitUrl, url: &GitUrl,
client: ClientWithMiddleware,
cache: PathBuf, cache: PathBuf,
reporter: Option<impl Reporter + 'static>, reporter: Option<impl Reporter + 'static>,
) -> Result<Fetch, GitResolverError> { ) -> Result<Fetch, GitResolverError> {
@ -67,9 +69,9 @@ impl GitResolver {
// Fetch the Git repository. // Fetch the Git repository.
let source = if let Some(reporter) = reporter { let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), cache).with_reporter(reporter) GitSource::new(url.clone(), client, cache).with_reporter(reporter)
} else { } else {
GitSource::new(url.clone(), cache) GitSource::new(url.clone(), client, cache)
}; };
let fetch = tokio::task::spawn_blocking(move || source.fetch()) let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await? .await?
@ -89,7 +91,8 @@ impl GitResolver {
pub async fn resolve( pub async fn resolve(
&self, &self,
url: &GitUrl, url: &GitUrl,
cache: impl Into<PathBuf>, client: ClientWithMiddleware,
cache: PathBuf,
reporter: Option<impl Reporter + 'static>, reporter: Option<impl Reporter + 'static>,
) -> Result<Option<GitUrl>, GitResolverError> { ) -> Result<Option<GitUrl>, GitResolverError> {
// If the Git reference already contains a complete SHA, short-circuit. // If the Git reference already contains a complete SHA, short-circuit.
@ -108,9 +111,9 @@ impl GitResolver {
// Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial // Fetch the precise SHA of the Git reference (which could be a branch, a tag, a partial
// commit, etc.). // commit, etc.).
let source = if let Some(reporter) = reporter { let source = if let Some(reporter) = reporter {
GitSource::new(url.clone(), cache).with_reporter(reporter) GitSource::new(url.clone(), client, cache).with_reporter(reporter)
} else { } else {
GitSource::new(url.clone(), cache) GitSource::new(url.clone(), client, cache)
}; };
let fetch = tokio::task::spawn_blocking(move || source.fetch()) let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await? .await?

View file

@ -4,7 +4,7 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::Result; use anyhow::Result;
use reqwest::Client; use reqwest_middleware::ClientWithMiddleware;
use tracing::{debug, instrument}; use tracing::{debug, instrument};
use url::Url; use url::Url;
@ -18,7 +18,7 @@ pub struct GitSource {
/// The Git reference from the manifest file. /// The Git reference from the manifest file.
git: GitUrl, git: GitUrl,
/// The HTTP client to use for fetching. /// The HTTP client to use for fetching.
client: Client, client: ClientWithMiddleware,
/// The path to the Git source database. /// The path to the Git source database.
cache: PathBuf, cache: PathBuf,
/// The reporter to use for this source. /// The reporter to use for this source.
@ -27,10 +27,14 @@ pub struct GitSource {
impl GitSource { impl GitSource {
/// Initialize a new Git source. /// Initialize a new Git source.
pub fn new(git: GitUrl, cache: impl Into<PathBuf>) -> Self { pub fn new(
git: GitUrl,
client: impl Into<ClientWithMiddleware>,
cache: impl Into<PathBuf>,
) -> Self {
Self { Self {
git, git,
client: Client::new(), client: client.into(),
cache: cache.into(), cache: cache.into(),
reporter: None, reporter: None,
} }
@ -56,7 +60,10 @@ impl GitSource {
let (db, actual_rev, task) = match (self.git.precise, remote.db_at(&db_path).ok()) { let (db, actual_rev, task) = match (self.git.precise, remote.db_at(&db_path).ok()) {
// If we have a locked revision, and we have a preexisting database // If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen. // which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev.into()) => (db, rev, None), (Some(rev), Some(db)) if db.contains(rev.into()) => {
debug!("Using existing git source `{:?}`", self.git.repository);
(db, rev, None)
}
// ... otherwise we use this state to update the git database. Note // ... otherwise we use this state to update the git database. Note
// that we still check for being offline here, for example in the // that we still check for being offline here, for example in the