Modify install plan to support all distribution types (#581)

This PR adds caching support for built wheels in the installer.
Specifically, the `RegistryWheelIndex` now indexes both downloaded and
built wheels (from registries), and we have a new `BuiltWheelIndex` that
takes a subdirectory and returns the "best-matching" compatible wheel.

Closes #570.
This commit is contained in:
Charlie Marsh 2023-12-06 23:43:34 -05:00 committed by GitHub
parent edaeb9b0e8
commit aa065f5c97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 444 additions and 147 deletions

View file

@ -16,6 +16,7 @@ workspace = true
distribution-filename = { path = "../distribution-filename", features = ["serde"] }
distribution-types = { path = "../distribution-types" }
install-wheel-rs = { path = "../install-wheel-rs" }
pep440_rs = { path = "../pep440-rs" }
platform-tags = { path = "../platform-tags" }
puffin-cache = { path = "../puffin-cache" }
puffin-client = { path = "../puffin-client" }

View file

@ -0,0 +1,91 @@
use std::path::PathBuf;
use fs_err as fs;
use tracing::warn;
use distribution_types::CachedWheel;
use platform_tags::Tags;
use crate::index::iter_directories;
/// A local index of built distributions for a specific source distribution.
#[derive(Debug)]
pub struct BuiltWheelIndex<'a> {
directory: PathBuf,
tags: &'a Tags,
}
impl<'a> BuiltWheelIndex<'a> {
/// Create a new index of built distributions.
///
/// The `directory` should be the directory containing the built distributions for a specific
/// source distribution. For example, given the built wheel cache structure:
/// ```text
/// built-wheels-v0/
/// └── pypi
/// └── django-allauth-0.51.0.tar.gz
/// ├── django_allauth-0.51.0-py3-none-any.whl
/// └── metadata.json
/// ```
///
/// The `directory` should be `built-wheels-v0/pypi/django-allauth-0.51.0.tar.gz`.
pub fn new(directory: impl Into<PathBuf>, tags: &'a Tags) -> Self {
Self {
directory: directory.into(),
tags,
}
}
/// Find the "best" distribution in the index.
///
/// This lookup prefers newer versions over older versions, and aims to maximize compatibility
/// with the target platform.
pub fn find(&self) -> Option<CachedWheel> {
let mut candidate: Option<CachedWheel> = None;
for subdir in iter_directories(self.directory.read_dir().ok()?) {
match CachedWheel::from_path(&subdir) {
Ok(None) => {}
Ok(Some(dist_info)) => {
// Pick the wheel with the highest priority
let compatibility = dist_info.filename.compatibility(self.tags);
// Only consider wheels that are compatible with our tags.
if compatibility.is_none() {
continue;
}
// TODO(charlie): Consider taking into account the freshness checks that we
// encode when building source distributions (e.g., timestamps). For now, we
// assume that distributions are immutable when installing (i.e., in this
// index).
if let Some(existing) = candidate.as_ref() {
// Override if the wheel is newer, or "more" compatible.
if dist_info.filename.version > existing.filename.version
|| compatibility > existing.filename.compatibility(self.tags)
{
candidate = Some(dist_info);
}
} else {
candidate = Some(dist_info);
}
}
Err(err) => {
warn!(
"Invalid cache entry at {}, removing. {err}",
subdir.display()
);
let result = fs::remove_dir_all(&subdir);
if let Err(err) = result {
warn!(
"Failed to remove invalid cache entry at {}: {err}",
subdir.display()
);
}
}
}
}
candidate
}
}

View file

@ -0,0 +1,27 @@
use std::path::PathBuf;
use tracing::warn;
pub use built_wheel_index::BuiltWheelIndex;
pub use registry_wheel_index::RegistryWheelIndex;
mod built_wheel_index;
mod registry_wheel_index;
/// Iterate over the subdirectories of a directory.
fn iter_directories(read_dir: std::fs::ReadDir) -> impl Iterator<Item = PathBuf> {
read_dir
.filter_map(|entry| match entry {
Ok(entry) => Some(entry),
Err(err) => {
warn!("Failed to read entry of cache: {}", err);
None
}
})
.filter(|entry| {
entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
})
.map(|entry| entry.path())
}

View file

@ -0,0 +1,111 @@
use std::collections::BTreeMap;
use std::path::Path;
use fs_err as fs;
use fxhash::FxHashMap;
use tracing::warn;
use distribution_types::{CachedRegistryDist, CachedWheel, Metadata};
use pep440_rs::Version;
use platform_tags::Tags;
use puffin_cache::{Cache, CacheBucket, WheelCache};
use puffin_normalize::PackageName;
use pypi_types::IndexUrls;
use crate::index::iter_directories;
/// A local index of distributions that originate from a registry, like `PyPI`.
#[derive(Debug, Default)]
pub struct RegistryWheelIndex(FxHashMap<PackageName, BTreeMap<Version, CachedRegistryDist>>);
impl RegistryWheelIndex {
/// Build an index of cached distributions from a directory.
pub fn from_directory(cache: &Cache, tags: &Tags, index_urls: &IndexUrls) -> Self {
let mut index = Self::default();
for index_url in index_urls {
// Index all the wheels that were downloaded directly from the registry.
// TODO(charlie): Shard the cache by package name, and do this lazily.
let wheel_dir = cache
.bucket(CacheBucket::Wheels)
.join(WheelCache::Index(index_url).wheel_dir());
index.add_directory(wheel_dir, tags);
// Index all the built wheels, created by downloading and building source distributions
// from the registry.
// TODO(charlie): Shard the cache by package name, and do this lazily.
let built_wheel_dir = cache
.bucket(CacheBucket::BuiltWheels)
.join(WheelCache::Index(index_url).wheel_dir());
let Ok(read_dir) = built_wheel_dir.read_dir() else {
continue;
};
for subdir in iter_directories(read_dir) {
index.add_directory(subdir, tags);
}
}
index
}
/// Returns a distribution from the index, if it exists.
pub fn by_name(
&self,
name: &PackageName,
) -> impl Iterator<Item = (&Version, &CachedRegistryDist)> {
// Using static to extend the lifetime
static DEFAULT_MAP: BTreeMap<Version, CachedRegistryDist> = BTreeMap::new();
self.0.get(name).unwrap_or(&DEFAULT_MAP).iter().rev()
}
/// Add the wheels in a given directory to the index.
///
/// Each subdirectory in the given path is expected to be that of an unzipped wheel.
fn add_directory(&mut self, path: impl AsRef<Path>, tags: &Tags) {
let Ok(read_dir) = path.as_ref().read_dir() else {
return;
};
for wheel_dir in iter_directories(read_dir) {
match CachedWheel::from_path(&wheel_dir) {
Ok(None) => {}
Ok(Some(dist_info)) => {
let dist_info = dist_info.into_registry_dist();
// Pick the wheel with the highest priority
let compatibility = dist_info.filename.compatibility(tags);
if let Some(existing) = self
.0
.get_mut(dist_info.name())
.and_then(|package| package.get_mut(&dist_info.filename.version))
{
// Override if we have better compatibility
if compatibility > existing.filename.compatibility(tags) {
*existing = dist_info;
}
} else if compatibility.is_some() {
self.0
.entry(dist_info.name().clone())
.or_default()
.insert(dist_info.filename.version.clone(), dist_info);
}
}
Err(err) => {
warn!(
"Invalid cache entry at {}, removing. {err}",
wheel_dir.display()
);
let result = fs::remove_dir_all(&wheel_dir);
if let Err(err) = result {
warn!(
"Failed to remove invalid cache entry at {}: {err}",
wheel_dir.display()
);
}
}
}
}
}
}

View file

@ -1,5 +1,6 @@
pub use distribution_database::{DistributionDatabase, DistributionDatabaseError};
pub use download::{DiskWheel, Download, InMemoryWheel, LocalWheel, SourceDistDownload};
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
pub use reporter::Reporter;
pub use source_dist::{SourceDistCachedBuilder, SourceDistError};
pub use unzip::Unzip;
@ -7,6 +8,7 @@ pub use unzip::Unzip;
mod distribution_database;
mod download;
mod error;
mod index;
mod locks;
mod reporter;
mod source_dist;