Apply advisory locks when building source distributions (#3525)

## Summary

I don't love this, but it turns out that setuptools is not robust to
parallel builds: https://github.com/pypa/setuptools/issues/3119. As a
result, if you run uv from multiple processes, and they each attempt to
build the same source distribution, you can hit failures.

This PR applies an advisory lock to the source distribution directory.
We apply it unconditionally, even if we ultimately find something in the
cache and _don't_ do a build, which helps ensure that we only build the
distribution once (and wait for that build to complete) rather than
kicking off builds from each thread.

Closes https://github.com/astral-sh/uv/issues/3512.

## Test Plan

Ran:

```sh
#!/bin/bash
make_venv(){
    target/debug/uv venv $1
    source $1/bin/activate
    target/debug/uv pip install opentracing --no-deps --verbose
}

for i in {1..8}
do
   make_venv ./$1/$i &
done
```
This commit is contained in:
Charlie Marsh 2024-05-13 10:42:20 -04:00 committed by GitHub
parent 42c3bfa351
commit 9a92a3ad37
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 26 deletions

View file

@ -11,6 +11,7 @@ use rustc_hash::FxHashSet;
use tempfile::{tempdir, TempDir}; use tempfile::{tempdir, TempDir};
use tracing::debug; use tracing::debug;
pub use archive::ArchiveId;
use distribution_types::InstalledDist; use distribution_types::InstalledDist;
use pypi_types::Metadata23; use pypi_types::Metadata23;
use uv_fs::{cachedir, directories}; use uv_fs::{cachedir, directories};
@ -23,7 +24,6 @@ use crate::removal::{rm_rf, Removal};
pub use crate::timestamp::Timestamp; pub use crate::timestamp::Timestamp;
pub use crate::wheel::WheelCache; pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind; use crate::wheel::WheelCacheKind;
pub use archive::ArchiveId;
mod archive; mod archive;
mod by_timestamp; mod by_timestamp;

View file

@ -32,7 +32,7 @@ use uv_client::{
}; };
use uv_configuration::{BuildKind, NoBuild}; use uv_configuration::{BuildKind, NoBuild};
use uv_extract::hash::Hasher; use uv_extract::hash::Hasher;
use uv_fs::write_atomic; use uv_fs::{write_atomic, LockedFile};
use uv_types::{BuildContext, SourceBuildTrait}; use uv_types::{BuildContext, SourceBuildTrait};
use crate::distribution_database::ManagedClient; use crate::distribution_database::ManagedClient;
@ -396,6 +396,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
client: &ManagedClient<'_>, client: &ManagedClient<'_>,
) -> Result<BuiltWheelMetadata, Error> { ) -> Result<BuiltWheelMetadata, Error> {
let _lock = lock_shard(cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.url_revision(source, filename, url, cache_shard, hashes, client) .url_revision(source, filename, url, cache_shard, hashes, client)
@ -465,6 +467,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
client: &ManagedClient<'_>, client: &ManagedClient<'_>,
) -> Result<ArchiveMetadata, Error> { ) -> Result<ArchiveMetadata, Error> {
let _lock = lock_shard(cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.url_revision(source, filename, url, cache_shard, hashes, client) .url_revision(source, filename, url, cache_shard, hashes, client)
@ -503,11 +507,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
{ {
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); fs::create_dir_all(metadata_entry.dir())
fs::create_dir_all(cache_entry.dir())
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -528,8 +531,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?; .await?;
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -625,6 +627,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
tags: &Tags, tags: &Tags,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
) -> Result<BuiltWheelMetadata, Error> { ) -> Result<BuiltWheelMetadata, Error> {
let _lock = lock_shard(cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.archive_revision(source, resource, cache_shard, hashes) .archive_revision(source, resource, cache_shard, hashes)
@ -691,6 +695,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
cache_shard: &CacheShard, cache_shard: &CacheShard,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
) -> Result<ArchiveMetadata, Error> { ) -> Result<ArchiveMetadata, Error> {
let _lock = lock_shard(cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.archive_revision(source, resource, cache_shard, hashes) .archive_revision(source, resource, cache_shard, hashes)
@ -728,11 +734,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
{ {
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); fs::create_dir_all(metadata_entry.dir())
fs::create_dir_all(cache_entry.dir())
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -759,7 +764,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
// Store the metadata. // Store the metadata.
let metadata_entry = cache_shard.entry(METADATA);
write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -838,6 +842,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
WheelCache::Path(resource.url).root(), WheelCache::Path(resource.url).root(),
); );
let _lock = lock_shard(&cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.source_tree_revision(source, resource, &cache_shard) .source_tree_revision(source, resource, &cache_shard)
@ -902,6 +908,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
WheelCache::Path(resource.url).root(), WheelCache::Path(resource.url).root(),
); );
let _lock = lock_shard(&cache_shard).await?;
// Fetch the revision for the source distribution. // Fetch the revision for the source distribution.
let revision = self let revision = self
.source_tree_revision(source, resource, &cache_shard) .source_tree_revision(source, resource, &cache_shard)
@ -925,11 +933,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
{ {
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); fs::create_dir_all(metadata_entry.dir())
fs::create_dir_all(cache_entry.dir())
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -953,7 +960,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
// Store the metadata. // Store the metadata.
let metadata_entry = cache_shard.entry(METADATA);
write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -1039,6 +1045,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
WheelCache::Git(&url, &git_sha.to_short_string()).root(), WheelCache::Git(&url, &git_sha.to_short_string()).root(),
); );
let _lock = lock_shard(&cache_shard).await?;
// If the cache contains a compatible wheel, return it. // If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) { if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
return Ok(built_wheel); return Ok(built_wheel);
@ -1060,8 +1068,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); let metadata_entry = cache_shard.entry(METADATA);
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -1111,6 +1119,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
WheelCache::Git(&url, &git_sha.to_short_string()).root(), WheelCache::Git(&url, &git_sha.to_short_string()).root(),
); );
let _lock = lock_shard(&cache_shard).await?;
// If the cache contains compatible metadata, return it. // If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA); let metadata_entry = cache_shard.entry(METADATA);
if self if self
@ -1132,11 +1142,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
{ {
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); fs::create_dir_all(metadata_entry.dir())
fs::create_dir_all(cache_entry.dir())
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?) write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -1160,8 +1169,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
// Store the metadata. // Store the metadata.
let cache_entry = cache_shard.entry(METADATA); write_atomic(metadata_entry.path(), rmp_serde::to_vec(&metadata)?)
write_atomic(cache_entry.path(), rmp_serde::to_vec(&metadata)?)
.await .await
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
@ -1625,3 +1633,19 @@ fn read_wheel_metadata(
let dist_info = read_archive_metadata(filename, &mut archive)?; let dist_info = read_archive_metadata(filename, &mut archive)?;
Ok(Metadata23::parse_metadata(&dist_info)?) Ok(Metadata23::parse_metadata(&dist_info)?)
} }
/// Apply an advisory lock to a [`CacheShard`] to prevent concurrent builds.
async fn lock_shard(cache_shard: &CacheShard) -> Result<LockedFile, Error> {
let root = cache_shard.as_ref();
fs_err::create_dir_all(root).map_err(Error::CacheWrite)?;
let lock: LockedFile = tokio::task::spawn_blocking({
let root = root.to_path_buf();
move || LockedFile::acquire(root.join(".lock"), root.display())
})
.await?
.map_err(Error::CacheWrite)?;
Ok(lock)
}

View file

@ -1273,7 +1273,7 @@ fn install_url_source_dist_cached() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Removed 126 files for tqdm ([SIZE]) Removed 127 files for tqdm ([SIZE])
"### "###
); );
@ -1370,7 +1370,7 @@ fn install_git_source_dist_cached() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Removed 3 files for werkzeug ([SIZE]) Removed 4 files for werkzeug ([SIZE])
"### "###
); );
@ -1471,7 +1471,7 @@ fn install_registry_source_dist_cached() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Removed 616 files for future ([SIZE]) Removed 617 files for future ([SIZE])
"### "###
); );
@ -1576,7 +1576,7 @@ fn install_path_source_dist_cached() -> Result<()> {
----- stdout ----- ----- stdout -----
----- stderr ----- ----- stderr -----
Removed 102 files for wheel ([SIZE]) Removed 103 files for wheel ([SIZE])
"### "###
); );