Enable PEP 517 builds for unnamed requirements (#2600)

## Summary

This PR enables the source distribution database to be used with unnamed
requirements (i.e., URLs without a package name). The (significant)
upside here is that we can now use PEP 517 hooks to resolve unnamed
requirement metadata _and_ reuse any computation in the cache.

The changes to `crates/uv-distribution/src/source/mod.rs` are quite
extensive, but mostly mechanical. The core idea is that we introduce a
new `BuildableSource` abstraction, which can either be a distribution,
or an unnamed URL:

```rust
/// A reference to a source that can be built into a built distribution.
///
/// This can either be a distribution (e.g., a package on a registry) or a direct URL.
///
/// Distributions can _also_ point to URLs in lieu of a registry; however, the primary distinction
/// here is that a distribution will always include a package name, while a URL will not.
#[derive(Debug, Clone, Copy)]
pub enum BuildableSource<'a> {
    Dist(&'a SourceDist),
    Url(SourceUrl<'a>),
}
```

All the methods on the source distribution database now accept
`BuildableSource`. `BuildableSource` has a `name()` method, but it
returns `Option<&PackageName>`, and everything is required to work with
and without a package name.

The main drawback of this approach (which isn't a terrible one) is that
we can no longer include the package name in the cache. (We do continue
to use the package name for registry-based distributions, since those
always have a name.). The package name was included in the cache route
for two reasons: (1) it's nice for debugging; and (2) we use it to power
`uv cache clean flask`, to identify the entries that are relevant for
Flask.

To solve this, I changed the `uv cache clean` code to look one level
deeper. So, when we want to determine whether to remove the cache entry
for a given URL, we now look into the directory to see if there are any
wheels that match the package name. This isn't as nice, but it does work
(and we have test coverage for it -- all passing).

I also considered removing the package name from the cache routes for
non-registry _wheels_, for consistency... But, it would require a cache
bump, and it didn't feel important enough to merit that.
This commit is contained in:
Charlie Marsh 2024-03-21 22:46:39 -04:00 committed by GitHub
parent 12192dd872
commit 5d7d7dce24
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 473 additions and 318 deletions

View file

@ -16,6 +16,7 @@ workspace = true
[dependencies]
cache-key = { workspace = true }
distribution-types = { workspace = true }
pypi-types = { workspace = true }
uv-fs = { workspace = true }
uv-normalize = { workspace = true }
@ -30,3 +31,4 @@ tempfile = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
rmp-serde = { workspace = true }

View file

@ -12,6 +12,7 @@ use tempfile::{tempdir, TempDir};
use tracing::debug;
use distribution_types::InstalledDist;
use pypi_types::Metadata23;
use uv_fs::directories;
use uv_normalize::PackageName;
@ -590,7 +591,7 @@ pub enum CacheBucket {
impl CacheBucket {
fn to_str(self) -> &'static str {
match self {
Self::BuiltWheels => "built-wheels-v0",
Self::BuiltWheels => "built-wheels-v1",
Self::FlatIndex => "flat-index-v0",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v0",
@ -604,6 +605,17 @@ impl CacheBucket {
///
/// Returns the number of entries removed from the cache.
fn remove(self, cache: &Cache, name: &PackageName) -> Result<Removal, io::Error> {
/// Returns `true` if the [`Path`] represents a built wheel for the given package.
fn is_match(path: &Path, name: &PackageName) -> bool {
let Ok(metadata) = fs_err::read(path.join("metadata.msgpack")) else {
return false;
};
let Ok(metadata) = rmp_serde::from_slice::<Metadata23>(&metadata) else {
return false;
};
metadata.name == *name
}
let mut summary = Removal::default();
match self {
Self::Wheels => {
@ -637,26 +649,35 @@ impl CacheBucket {
summary += rm_rf(directory.join(name.to_string()))?;
}
// For direct URLs, we expect a directory for every index, followed by a
// directory per package (indexed by name).
// For direct URLs, we expect a directory for every URL, followed by a
// directory per version. To determine whether the URL is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Url);
for directory in directories(root) {
summary += rm_rf(directory.join(name.to_string()))?;
for url in directories(root) {
if directories(&url).any(|version| is_match(&version, name)) {
summary += rm_rf(url)?;
}
}
// For local dependencies, we expect a directory for every path, followed by a
// directory per package (indexed by name).
// directory per version. To determine whether the path is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Path);
for directory in directories(root) {
summary += rm_rf(directory.join(name.to_string()))?;
for path in directories(root) {
if directories(&path).any(|version| is_match(&version, name)) {
summary += rm_rf(path)?;
}
}
// For Git dependencies, we expect a directory for every repository, followed by a
// directory for every SHA, followed by a directory per package (indexed by name).
// directory for every SHA. To determine whether the SHA is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Git);
for directory in directories(root) {
for directory in directories(directory) {
summary += rm_rf(directory.join(name.to_string()))?;
for repository in directories(root) {
for sha in directories(repository) {
if is_match(&sha, name) {
summary += rm_rf(sha)?;
}
}
}
}

View file

@ -5,13 +5,7 @@ use url::Url;
use cache_key::{digest, CanonicalUrl};
use distribution_types::IndexUrl;
#[allow(unused_imports)] // For rustdoc
use crate::CacheBucket;
/// Cache wheels and their metadata, both from remote wheels and built from source distributions.
///
/// Use [`WheelCache::remote_wheel_dir`] for remote wheel metadata caching and
/// [`WheelCache::built_wheel_dir`] for built source distributions metadata caching.
#[derive(Debug, Clone)]
pub enum WheelCache<'a> {
/// Either PyPI or an alternative index, which we key by index URL.
@ -28,7 +22,8 @@ pub enum WheelCache<'a> {
}
impl<'a> WheelCache<'a> {
fn bucket(&self) -> PathBuf {
/// The root directory for a cache bucket.
pub fn root(&self) -> PathBuf {
match self {
WheelCache::Index(IndexUrl::Pypi(_)) => WheelCacheKind::Pypi.root(),
WheelCache::Index(url) => WheelCacheKind::Index
@ -47,14 +42,9 @@ impl<'a> WheelCache<'a> {
}
}
/// Metadata of a remote wheel. See [`CacheBucket::Wheels`]
pub fn remote_wheel_dir(&self, package_name: impl AsRef<Path>) -> PathBuf {
self.bucket().join(package_name)
}
/// Metadata of a built source distribution. See [`CacheBucket::BuiltWheels`]
pub fn built_wheel_dir(&self, filename: impl AsRef<Path>) -> PathBuf {
self.bucket().join(filename)
/// A subdirectory in a bucket for wheels for a specific package.
pub fn wheel_dir(&self, package_name: impl AsRef<Path>) -> PathBuf {
self.root().join(package_name)
}
}