Rename "built-wheels" cache bucket to "source-dists" (#5077)

This name should lead to less confusion. Unfortunately this is a
"breaking cache change" so everyone's cache will be invalidated. I'm not
sure if we should support a rename-on-upgrade.

edit: We can make the breaking change next time we bump the version
This commit is contained in:
Zanie Blue 2024-07-15 15:41:03 -04:00 committed by GitHub
parent 492e778fe7
commit 1b1eba12c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 84 additions and 71 deletions

View file

@ -295,9 +295,9 @@ impl Cache {
// Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore // Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
// doesn't interfere with source distribution builds. Build backends (like hatchling) will // doesn't interfere with source distribution builds. Build backends (like hatchling) will
// traverse upwards to look for .gitignore files. // traverse upwards to look for .gitignore files.
fs::create_dir_all(root.join(CacheBucket::BuiltWheels.to_str()))?; fs::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
match fs::OpenOptions::new().write(true).create_new(true).open( match fs::OpenOptions::new().write(true).create_new(true).open(
root.join(CacheBucket::BuiltWheels.to_str()) root.join(CacheBucket::SourceDistributions.to_str())
.join(".gitignore"), .join(".gitignore"),
) { ) {
Ok(_) => {} Ok(_) => {}
@ -312,10 +312,10 @@ impl Cache {
// We have to put this below the gitignore. Otherwise, if the build backend uses the rust // We have to put this below the gitignore. Otherwise, if the build backend uses the rust
// ignore crate it will walk up to the top level .gitignore and ignore its python source // ignore crate it will walk up to the top level .gitignore and ignore its python source
// files. // files.
fs::OpenOptions::new() fs::OpenOptions::new().create(true).write(true).open(
.create(true) root.join(CacheBucket::SourceDistributions.to_str())
.write(true) .join(".git"),
.open(root.join(CacheBucket::BuiltWheels.to_str()).join(".git"))?; )?;
Ok(Self { Ok(Self {
root: fs::canonicalize(root)?, root: fs::canonicalize(root)?,
@ -525,8 +525,8 @@ pub enum CacheBucket {
/// └── flask-3.0.0.dist-info /// └── flask-3.0.0.dist-info
/// └── ... /// └── ...
Wheels, Wheels,
/// Wheels built from source distributions, their extracted metadata and the cache policy of /// Source distributions, wheels built from source distributions, their extracted metadata, and the
/// the source distribution. /// cache policy of the source distribution.
/// ///
/// The structure is similar of that of the `Wheel` bucket, except we have an additional layer /// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
/// for the source distribution filename and the metadata is at the source distribution-level, /// for the source distribution filename and the metadata is at the source distribution-level,
@ -535,6 +535,9 @@ pub enum CacheBucket {
/// TODO(konstin): The cache policy should be on the source distribution level, the metadata we /// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
/// can put next to the wheels as in the `Wheels` bucket. /// can put next to the wheels as in the `Wheels` bucket.
/// ///
/// The unzipped source distribution is stored in a directory matching the source distribution
/// acrhive name.
///
/// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped /// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
/// lazily before installing. So when resolving, we only build the wheel and store the archive /// lazily before installing. So when resolving, we only build the wheel and store the archive
/// file in the cache, when installing, we unpack it under the same name (exclusive of the /// file in the cache, when installing, we unpack it under the same name (exclusive of the
@ -566,32 +569,35 @@ pub enum CacheBucket {
/// ///
/// ...may be cached as: /// ...may be cached as:
/// ```text /// ```text
/// built-wheels-v0/ /// built-wheels-v3/
/// ├── git /// ├── git
/// │ └── a67db8ed076e3814 /// │   └── 2122faf3e081fb7a
/// │ └── 843b753e9e8cb74e83cac55598719b39a4d5ef1f /// │      └── 7a2d650a4a7b4d04
/// │ ├── manifest.msgpack /// │      ├── metadata.msgpack
/// │ ├── metadata.msgpack /// │       └── pydantic_extra_types-2.9.0-py3-none-any.whl
/// │ └── pydantic_extra_types-2.1.0-py3-none-any.whl
/// ├── pypi /// ├── pypi
/// │ └── django /// │ └── django-allauth
/// │ └── django-allauth-0.51.0.tar.gz /// │ └── 0.51.0
/// │ ├── django_allauth-0.51.0-py3-none-any.whl /// │ ├── 0gH-_fwv8tdJ7JwwjJsUc
/// │ ├── manifest.msgpack /// │ │   ├── django-allauth-0.51.0.tar.gz
/// │ └── metadata.msgpack /// │ │ │ └── [UNZIPPED CONTENTS]
/// │ │   ├── django_allauth-0.51.0-py3-none-any.whl
/// │ │   └── metadata.msgpack
/// │ └── revision.http
/// └── url /// └── url
/// └── 6781bd6440ae72c2 /// └── 6781bd6440ae72c2
/// └── werkzeug /// ├── APYY01rbIfpAo_ij9sCY6
/// └── werkzeug-3.0.1.tar.gz /// │   ├── metadata.msgpack
/// ├── manifest.msgpack /// │   ├── werkzeug-3.0.1-py3-none-any.whl
/// ├── metadata.msgpack /// │   └── werkzeug-3.0.1.tar.gz
/// └── werkzeug-3.0.1-py3-none-any.whl /// │ └── [UNZIPPED CONTENTS]
/// └── revision.http
/// ``` /// ```
/// ///
/// Structurally, the `manifest.msgpack` is empty, and only contains the caching information /// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
/// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source /// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
/// distribution. /// distribution.
BuiltWheels, SourceDistributions,
/// Flat index responses, a format very similar to the simple metadata API. /// Flat index responses, a format very similar to the simple metadata API.
/// ///
/// Cache structure: /// Cache structure:
@ -663,7 +669,8 @@ pub enum CacheBucket {
impl CacheBucket { impl CacheBucket {
fn to_str(self) -> &'static str { fn to_str(self) -> &'static str {
match self { match self {
Self::BuiltWheels => "built-wheels-v3", // Note, next time we change the version we should change the name of this bucket to `source-dists-v0`
Self::SourceDistributions => "built-wheels-v3",
Self::FlatIndex => "flat-index-v0", Self::FlatIndex => "flat-index-v0",
Self::Git => "git-v0", Self::Git => "git-v0",
Self::Interpreter => "interpreter-v2", Self::Interpreter => "interpreter-v2",
@ -711,7 +718,7 @@ impl CacheBucket {
summary += rm_rf(directory.join(name.to_string()))?; summary += rm_rf(directory.join(name.to_string()))?;
} }
} }
Self::BuiltWheels => { Self::SourceDistributions => {
// For `pypi` wheels, we expect a directory per package (indexed by name). // For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi); let root = cache.bucket(self).join(WheelCacheKind::Pypi);
summary += rm_rf(root.join(name.to_string()))?; summary += rm_rf(root.join(name.to_string()))?;
@ -796,7 +803,7 @@ impl CacheBucket {
pub fn iter() -> impl Iterator<Item = Self> { pub fn iter() -> impl Iterator<Item = Self> {
[ [
Self::Wheels, Self::Wheels,
Self::BuiltWheels, Self::SourceDistributions,
Self::FlatIndex, Self::FlatIndex,
Self::Git, Self::Git,
Self::Interpreter, Self::Interpreter,

View file

@ -35,7 +35,7 @@ impl<'a> BuiltWheelIndex<'a> {
pub fn url(&self, source_dist: &DirectUrlSourceDist) -> Result<Option<CachedWheel>, Error> { pub fn url(&self, source_dist: &DirectUrlSourceDist) -> Result<Option<CachedWheel>, Error> {
// For direct URLs, cache directly under the hash of the URL itself. // For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self.cache.shard( let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Url(source_dist.url.raw()).root(), WheelCache::Url(source_dist.url.raw()).root(),
); );
@ -57,7 +57,7 @@ impl<'a> BuiltWheelIndex<'a> {
/// Return the most compatible [`CachedWheel`] for a given source distribution at a local path. /// Return the most compatible [`CachedWheel`] for a given source distribution at a local path.
pub fn path(&self, source_dist: &PathSourceDist) -> Result<Option<CachedWheel>, Error> { pub fn path(&self, source_dist: &PathSourceDist) -> Result<Option<CachedWheel>, Error> {
let cache_shard = self.cache.shard( let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Path(&source_dist.url).root(), WheelCache::Path(&source_dist.url).root(),
); );
@ -92,7 +92,7 @@ impl<'a> BuiltWheelIndex<'a> {
source_dist: &DirectorySourceDist, source_dist: &DirectorySourceDist,
) -> Result<Option<CachedWheel>, Error> { ) -> Result<Option<CachedWheel>, Error> {
let cache_shard = self.cache.shard( let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
if source_dist.editable { if source_dist.editable {
WheelCache::Editable(&source_dist.url).root() WheelCache::Editable(&source_dist.url).root()
} else { } else {
@ -139,7 +139,7 @@ impl<'a> BuiltWheelIndex<'a> {
let git_sha = source_dist.git.precise()?; let git_sha = source_dist.git.precise()?;
let cache_shard = self.cache.shard( let cache_shard = self.cache.shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Git(&source_dist.url, &git_sha.to_short_string()).root(), WheelCache::Git(&source_dist.url, &git_sha.to_short_string()).root(),
); );

View file

@ -142,7 +142,7 @@ impl<'a> RegistryWheelIndex<'a> {
// Index all the built wheels, created by downloading and building source distributions // Index all the built wheels, created by downloading and building source distributions
// from the registry. // from the registry.
let cache_shard = cache.shard( let cache_shard = cache.shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Index(index_url).wheel_dir(package.to_string()), WheelCache::Index(index_url).wheel_dir(package.to_string()),
); );

View file

@ -92,7 +92,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// For registry source distributions, shard by package, then version, for // For registry source distributions, shard by package, then version, for
// convenience in debugging. // convenience in debugging.
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Index(&dist.index) WheelCache::Index(&dist.index)
.wheel_dir(dist.name.as_ref()) .wheel_dir(dist.name.as_ref())
.join(dist.version.to_string()), .join(dist.version.to_string()),
@ -161,10 +161,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(dist.url.to_url()); ParsedArchiveUrl::from(dist.url.to_url());
// For direct URLs, cache directly under the hash of the URL itself. // For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Url(&url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root()); );
self.url( self.url(
source, source,
@ -190,10 +190,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Dist(SourceDist::Path(dist)) => { BuildableSource::Dist(SourceDist::Path(dist)) => {
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Path(&dist.url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Path(&dist.url).root()); );
self.archive( self.archive(
source, source,
&PathSourceUrl::from(dist), &PathSourceUrl::from(dist),
@ -213,10 +213,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(resource.url.clone()); ParsedArchiveUrl::from(resource.url.clone());
// For direct URLs, cache directly under the hash of the URL itself. // For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Url(&url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root()); );
self.url( self.url(
source, source,
@ -243,7 +243,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
BuildableSource::Url(SourceUrl::Path(resource)) => { BuildableSource::Url(SourceUrl::Path(resource)) => {
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Path(resource.url).root(), WheelCache::Path(resource.url).root(),
); );
self.archive(source, resource, &cache_shard, tags, hashes) self.archive(source, resource, &cache_shard, tags, hashes)
@ -268,7 +268,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
BuildableSource::Dist(SourceDist::Registry(dist)) => { BuildableSource::Dist(SourceDist::Registry(dist)) => {
// For registry source distributions, shard by package, then version. // For registry source distributions, shard by package, then version.
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Index(&dist.index) WheelCache::Index(&dist.index)
.wheel_dir(dist.name.as_ref()) .wheel_dir(dist.name.as_ref())
.join(dist.version.to_string()), .join(dist.version.to_string()),
@ -334,10 +334,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(dist.url.to_url()); ParsedArchiveUrl::from(dist.url.to_url());
// For direct URLs, cache directly under the hash of the URL itself. // For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Url(&url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root()); );
self.url_metadata( self.url_metadata(
source, source,
@ -362,10 +362,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await? .await?
} }
BuildableSource::Dist(SourceDist::Path(dist)) => { BuildableSource::Dist(SourceDist::Path(dist)) => {
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Path(&dist.url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Path(&dist.url).root()); );
self.archive_metadata(source, &PathSourceUrl::from(dist), &cache_shard, hashes) self.archive_metadata(source, &PathSourceUrl::from(dist), &cache_shard, hashes)
.boxed_local() .boxed_local()
.await? .await?
@ -379,10 +379,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
ParsedArchiveUrl::from(resource.url.clone()); ParsedArchiveUrl::from(resource.url.clone());
// For direct URLs, cache directly under the hash of the URL itself. // For direct URLs, cache directly under the hash of the URL itself.
let cache_shard = self let cache_shard = self.build_context.cache().shard(
.build_context CacheBucket::SourceDistributions,
.cache() WheelCache::Url(&url).root(),
.shard(CacheBucket::BuiltWheels, WheelCache::Url(&url).root()); );
self.url_metadata( self.url_metadata(
source, source,
@ -408,7 +408,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
BuildableSource::Url(SourceUrl::Path(resource)) => { BuildableSource::Url(SourceUrl::Path(resource)) => {
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Path(resource.url).root(), WheelCache::Path(resource.url).root(),
); );
self.archive_metadata(source, resource, &cache_shard, hashes) self.archive_metadata(source, resource, &cache_shard, hashes)
@ -887,7 +887,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
if resource.editable { if resource.editable {
WheelCache::Editable(resource.url).root() WheelCache::Editable(resource.url).root()
} else { } else {
@ -956,7 +956,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
} }
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
if resource.editable { if resource.editable {
WheelCache::Editable(resource.url).root() WheelCache::Editable(resource.url).root()
} else { } else {
@ -1121,7 +1121,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let git_sha = fetch.git().precise().expect("Exact commit after checkout"); let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(), WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
); );
@ -1208,7 +1208,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let git_sha = fetch.git().precise().expect("Exact commit after checkout"); let git_sha = fetch.git().precise().expect("Exact commit after checkout");
let cache_shard = self.build_context.cache().shard( let cache_shard = self.build_context.cache().shard(
CacheBucket::BuiltWheels, CacheBucket::SourceDistributions,
WheelCache::Git(resource.url, &git_sha.to_short_string()).root(), WheelCache::Git(resource.url, &git_sha.to_short_string()).root(),
); );
@ -1284,8 +1284,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
target: &Path, target: &Path,
hashes: HashPolicy<'_>, hashes: HashPolicy<'_>,
) -> Result<Vec<HashDigest>, Error> { ) -> Result<Vec<HashDigest>, Error> {
let temp_dir = let temp_dir = tempfile::tempdir_in(
tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) self.build_context
.cache()
.bucket(CacheBucket::SourceDistributions),
)
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
let reader = response let reader = response
.bytes_stream() .bytes_stream()
@ -1336,8 +1339,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
) -> Result<Vec<HashDigest>, Error> { ) -> Result<Vec<HashDigest>, Error> {
debug!("Unpacking for build: {}", path.display()); debug!("Unpacking for build: {}", path.display());
let temp_dir = let temp_dir = tempfile::tempdir_in(
tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) self.build_context
.cache()
.bucket(CacheBucket::SourceDistributions),
)
.map_err(Error::CacheWrite)?; .map_err(Error::CacheWrite)?;
let reader = fs_err::tokio::File::open(&path) let reader = fs_err::tokio::File::open(&path)
.await .await