Rename "built-wheels" cache bucket to "source-dists" (#5077)

This name should lead to less confusion. Unfortunately this is a
"breaking cache change" so everyone's cache will be invalidated. I'm not
sure if we should support a rename-on-upgrade.

edit: We can make the breaking change next time we bump the version
This commit is contained in:
Zanie Blue 2024-07-15 15:41:03 -04:00 committed by GitHub
parent 492e778fe7
commit 1b1eba12c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 84 additions and 71 deletions

View file

@ -295,9 +295,9 @@ impl Cache {
// Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
// doesn't interfere with source distribution builds. Build backends (like hatchling) will
// traverse upwards to look for .gitignore files.
fs::create_dir_all(root.join(CacheBucket::BuiltWheels.to_str()))?;
fs::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
match fs::OpenOptions::new().write(true).create_new(true).open(
root.join(CacheBucket::BuiltWheels.to_str())
root.join(CacheBucket::SourceDistributions.to_str())
.join(".gitignore"),
) {
Ok(_) => {}
@ -312,10 +312,10 @@ impl Cache {
// We have to put this below the gitignore. Otherwise, if the build backend uses the rust
// ignore crate it will walk up to the top level .gitignore and ignore its python source
// files.
fs::OpenOptions::new()
.create(true)
.write(true)
.open(root.join(CacheBucket::BuiltWheels.to_str()).join(".git"))?;
fs::OpenOptions::new().create(true).write(true).open(
root.join(CacheBucket::SourceDistributions.to_str())
.join(".git"),
)?;
Ok(Self {
root: fs::canonicalize(root)?,
@ -525,8 +525,8 @@ pub enum CacheBucket {
/// └── flask-3.0.0.dist-info
/// └── ...
Wheels,
/// Wheels built from source distributions, their extracted metadata and the cache policy of
/// the source distribution.
/// Source distributions, wheels built from source distributions, their extracted metadata, and the
/// cache policy of the source distribution.
///
/// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
/// for the source distribution filename and the metadata is at the source distribution-level,
@ -535,6 +535,9 @@ pub enum CacheBucket {
/// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
/// can put next to the wheels as in the `Wheels` bucket.
///
/// The unzipped source distribution is stored in a directory matching the source distribution
/// acrhive name.
///
/// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
/// lazily before installing. So when resolving, we only build the wheel and store the archive
/// file in the cache, when installing, we unpack it under the same name (exclusive of the
@ -566,32 +569,35 @@ pub enum CacheBucket {
///
/// ...may be cached as:
/// ```text
/// built-wheels-v0/
/// built-wheels-v3/
/// ├── git
/// │ └── a67db8ed076e3814
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f
/// │ ├── manifest.msgpack
/// │ ├── metadata.msgpack
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
/// │   └── 2122faf3e081fb7a
/// │      └── 7a2d650a4a7b4d04
/// │      ├── metadata.msgpack
/// │       └── pydantic_extra_types-2.9.0-py3-none-any.whl
/// ├── pypi
/// │ └── django
/// │ └── django-allauth-0.51.0.tar.gz
/// │ ├── django_allauth-0.51.0-py3-none-any.whl
/// │ ├── manifest.msgpack
/// │ └── metadata.msgpack
/// │ └── django-allauth
/// │ └── 0.51.0
/// │ ├── 0gH-_fwv8tdJ7JwwjJsUc
/// │ │   ├── django-allauth-0.51.0.tar.gz
/// │ │ │ └── [UNZIPPED CONTENTS]
/// │ │   ├── django_allauth-0.51.0-py3-none-any.whl
/// │ │   └── metadata.msgpack
/// │ └── revision.http
/// └── url
/// └── 6781bd6440ae72c2
/// └── werkzeug
/// └── werkzeug-3.0.1.tar.gz
/// ├── manifest.msgpack
/// ├── metadata.msgpack
/// └── werkzeug-3.0.1-py3-none-any.whl
/// ├── APYY01rbIfpAo_ij9sCY6
/// │   ├── metadata.msgpack
/// │   ├── werkzeug-3.0.1-py3-none-any.whl
/// │   └── werkzeug-3.0.1.tar.gz
/// │ └── [UNZIPPED CONTENTS]
/// └── revision.http
/// ```
///
/// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
/// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
/// distribution.
BuiltWheels,
SourceDistributions,
/// Flat index responses, a format very similar to the simple metadata API.
///
/// Cache structure:
@ -663,7 +669,8 @@ pub enum CacheBucket {
impl CacheBucket {
fn to_str(self) -> &'static str {
match self {
Self::BuiltWheels => "built-wheels-v3",
// Note, next time we change the version we should change the name of this bucket to `source-dists-v0`
Self::SourceDistributions => "built-wheels-v3",
Self::FlatIndex => "flat-index-v0",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v2",
@ -711,7 +718,7 @@ impl CacheBucket {
summary += rm_rf(directory.join(name.to_string()))?;
}
}
Self::BuiltWheels => {
Self::SourceDistributions => {
// For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
summary += rm_rf(root.join(name.to_string()))?;
@ -796,7 +803,7 @@ impl CacheBucket {
pub fn iter() -> impl Iterator<Item = Self> {
[
Self::Wheels,
Self::BuiltWheels,
Self::SourceDistributions,
Self::FlatIndex,
Self::Git,
Self::Interpreter,

View file

@ -35,7 +35,7 @@ impl<'a> BuiltWheelIndex<'a> {
pub fn url(&self, source_dist: &DirectUrlSourceDist) -> Result<Option<CachedWheel>, Error> {
// For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Url(source_dist.url.raw()).root(),
);
@ -57,7 +57,7 @@ impl<'a> BuiltWheelIndex<'a> {
/// Return the most compatible [`CachedWheel`] for a given source distribution at a local path.
pub fn path(&self, source_dist: &PathSourceDist) -> Result<Option<CachedWheel>, Error> {
let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Path(&source_dist.url).root(),
);
@ -92,7 +92,7 @@ impl<'a> BuiltWheelIndex<'a> {
source_dist: &DirectorySourceDist,
) -> Result<Option<CachedWheel>, Error> {
let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
if source_dist.editable {
WheelCache::Editable(&source_dist.url).root()
} else {
@ -139,7 +139,7 @@ impl<'a> BuiltWheelIndex<'a> {
let git_sha = source_dist.git.precise()?;
let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Git(&source_dist.url, &git_sha.to_short_string()).root(),
);

View file

@ -142,7 +142,7 @@ impl<'a> RegistryWheelIndex<'a> {
// Index all the built wheels, created by downloading and building source distributions
// from the registry.
let cache_shard = cache.shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Index(index_url).wheel_dir(package.to_string()),
);

View file

@ -92,7 +92,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// For registry source distributions, shard by package, then version, for
// convenience in debugging.
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Index(&dist.index)
.wheel_dir(dist.name.as_ref())
.join(dist.version.to_string()),
@ -161,10 +161,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(dist.url.to_url());
// For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Url(&url).root(),
);
self.url(
source,
@ -190,10 +190,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Dist(SourceDist::Path(dist)) => {
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Path(&dist.url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Path(&dist.url).root(),
);
self.archive(
source,
&PathSourceUrl::from(dist),
@ -213,10 +213,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(resource.url.clone());
// For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Url(&url).root(),
);
self.url(
source,
@ -243,7 +243,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
BuildableSource::Url(SourceUrl::Path(resource)) => {
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Path(resource.url).root(),
);
self.archive(source, resource, &cache_shard, tags, hashes)
@ -268,7 +268,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
BuildableSource::Dist(SourceDist::Registry(dist)) => {
// For registry source distributions, shard by package, then version.
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Index(&dist.index)
.wheel_dir(dist.name.as_ref())
.join(dist.version.to_string()),
@ -334,10 +334,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(dist.url.to_url());
// For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Url(&url).root(),
);
self.url_metadata(
source,
@ -362,10 +362,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await?
}
BuildableSource::Dist(SourceDist::Path(dist)) => {
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Path(&dist.url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Path(&dist.url).root(),
);
self.archive_metadata(source, &PathSourceUrl::from(dist), &cache_shard, hashes)
.boxed_local()
.await?
@ -379,10 +379,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(resource.url.clone());
// For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self
.build_context
.cache()
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root());
let cache_shard = self.build_context.cache().shard(
CacheBucket::SourceDistributions,
WheelCache::Url(&url).root(),
);
self.url_metadata(
source,
@ -408,7 +408,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
BuildableSource::Url(SourceUrl::Path(resource)) => {
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Path(resource.url).root(),
);
self.archive_metadata(source, resource, &cache_shard, hashes)
@ -887,7 +887,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
if resource.editable {
WheelCache::Editable(resource.url).root()
} else {
@ -956,7 +956,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
if resource.editable {
WheelCache::Editable(resource.url).root()
} else {
@ -1121,7 +1121,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
);
@ -1208,7 +1208,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels,
CacheBucket::SourceDistributions,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
);
@ -1284,9 +1284,12 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
target: &Path,
hashes: HashPolicy<'_>,
) -> Result<Vec<HashDigest>, Error> {
let temp_dir =
tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels))
.map_err(Error::CacheWrite)?;
let temp_dir = tempfile::tempdir_in(
self.build_context
.cache()
.bucket(CacheBucket::SourceDistributions),
)
.map_err(Error::CacheWrite)?;
let reader = response
.bytes_stream()
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
@ -1336,9 +1339,12 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
) -> Result<Vec<HashDigest>, Error> {
debug!("Unpacking for build: {}", path.display());
let temp_dir =
tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels))
.map_err(Error::CacheWrite)?;
let temp_dir = tempfile::tempdir_in(
self.build_context
.cache()
.bucket(CacheBucket::SourceDistributions),
)
.map_err(Error::CacheWrite)?;
let reader = fs_err::tokio::File::open(&path)
.await
.map_err(Error::CacheRead)?;