Use locks to prevent concurrent accesses to the same Git repo (#304)

Ensures that if we need to access the same Git repo twice in a
resolution, we only have one handler to that repo at a time. (Otherwise,
`git2` panics.)
This commit is contained in:
Charlie Marsh 2023-11-03 09:33:14 -07:00 committed by GitHub
parent fa1bbbbe08
commit aa9882eee8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 115 additions and 1 deletions

View file

@ -23,6 +23,16 @@ impl CanonicalUrl {
url.path_segments_mut().unwrap().pop_if_empty();
}
// If a URL starts with a kind (like `git+`), remove it.
if let Some(suffix) = url.as_str().strip_prefix("git+") {
// If a Git URL ends in a reference (like a branch, tag, or commit), remove it.
if let Some((prefix, _)) = suffix.rsplit_once('@') {
url = prefix.parse().unwrap();
} else {
url = suffix.parse().unwrap();
}
}
// For GitHub URLs specifically, just lower-case everything. GitHub
// treats both the same, but they hash differently, and we're gonna be
// hashing them. This wants a more general solution, and also we're

View file

@ -804,6 +804,43 @@ fn compile_git_refs_https_dependency() -> Result<()> {
Ok(())
}
/// Resolve two packages from a `requirements.in` file with the same Git HTTPS dependency.
#[test]
fn compile_git_concurrent_access() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_in = temp_dir.child("requirements.in");
requirements_in.touch()?;
requirements_in
.write_str("flask @ git+https://github.com/pallets/flask.git@2.0.0\ndask @ git+https://github.com/pallets/flask.git@3.0.0")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Request Flask, but include a URL dependency for Werkzeug, which should avoid adding a
/// duplicate dependency from `PyPI`.
#[test]

View file

@ -0,0 +1,43 @@
---
source: crates/puffin-cli/tests/pip_compile.rs
info:
program: puffin
args:
- pip-compile
- requirements.in
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp6j5dBk
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpYyHlPp/.venv
---
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# [BIN_PATH] pip-compile requirements.in --cache-dir [CACHE_DIR]
click==8.1.7
# via
# dask
# flask
dask @ git+https://github.com/pallets/flask.git@735a4701d6d5e848241e7d7535db898efb62d400
flask @ git+https://github.com/pallets/flask.git@2f0c62f5e6e290843f03c1fa70817c7a3c7fd661
itsdangerous==2.1.2
# via
# dask
# flask
jinja2==3.1.2
# via
# dask
# flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via
# dask
# flask
----- stderr -----
Resolved 7 packages in [TIME]

View file

@ -14,6 +14,7 @@ use pubgrub::range::Range;
use pubgrub::solver::{Incompatibility, State};
use pubgrub::type_aliases::DependencyConstraints;
use tokio::select;
use tokio::sync::Mutex;
use tracing::{debug, error, trace};
use url::Url;
use waitmap::WaitMap;
@ -21,6 +22,7 @@ use waitmap::WaitMap;
use distribution_filename::{SourceDistributionFilename, WheelFilename};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_cache::CanonicalUrl;
use puffin_client::RegistryClient;
use puffin_distribution::{RemoteDistributionRef, VersionOrUrl};
use puffin_normalize::{ExtraName, PackageName};
@ -45,6 +47,7 @@ pub struct Resolver<'a, Context: BuildContext + Sync> {
client: &'a RegistryClient,
selector: CandidateSelector,
index: Arc<Index>,
locks: Arc<Locks>,
build_context: &'a Context,
reporter: Option<Box<dyn Reporter>>,
}
@ -59,8 +62,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
build_context: &'a Context,
) -> Self {
Self {
selector: CandidateSelector::from(&manifest),
index: Arc::new(Index::default()),
locks: Arc::new(Locks::default()),
selector: CandidateSelector::from(&manifest),
project: manifest.project,
requirements: manifest.requirements,
constraints: manifest.constraints,
@ -697,6 +701,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
// Build a source distribution from a remote URL, returning its metadata.
Request::SdistUrl(package_name, url) => {
let lock = self.locks.acquire(&url).await;
let _guard = lock.lock().await;
let fetcher = SourceDistributionFetcher::new(self.build_context);
let precise =
fetcher
@ -744,6 +751,9 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
}
// Fetch wheel metadata from a remote URL.
Request::WheelUrl(package_name, url) => {
let lock = self.locks.acquire(&url).await;
let _guard = lock.lock().await;
let fetcher = WheelFetcher::new(self.build_context.cache());
let distribution = RemoteDistributionRef::from_url(&package_name, &url);
let metadata = match fetcher.find_dist_info(&distribution, self.tags) {
@ -867,6 +877,20 @@ impl InFlight {
}
}
/// A set of locks used to prevent concurrent access to the same resource.
#[derive(Debug, Default)]
struct Locks(Mutex<FxHashMap<String, Arc<Mutex<()>>>>);
impl Locks {
/// Acquire a lock on the given resource.
async fn acquire(&self, url: &Url) -> Arc<Mutex<()>> {
let mut map = self.0.lock().await;
map.entry(puffin_cache::digest(&CanonicalUrl::new(url)))
.or_insert_with(|| Arc::new(Mutex::new(())))
.clone()
}
}
/// In-memory index of package metadata.
struct Index {
/// A map from package name to the metadata for that package.