Avoid batch prefetching for un-optimized registries (#7226)

## Summary

We now track the discovered `IndexCapabilities` for each `IndexUrl`. If
we learn that an index doesn't support range requests, we avoid doing
any batch prefetching.

Closes https://github.com/astral-sh/uv/issues/7221.
This commit is contained in:
Charlie Marsh 2024-09-09 15:46:19 -04:00 committed by GitHub
parent 970bd1aa0c
commit 9a7262c360
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 202 additions and 95 deletions

1
Cargo.lock generated
View file

@ -1040,6 +1040,7 @@ dependencies = [
"platform-tags",
"pypi-types",
"rkyv",
"rustc-hash",
"schemars",
"serde",
"serde_json",

View file

@ -83,7 +83,7 @@ mod resolver {
use anyhow::Result;
use distribution_types::IndexLocations;
use distribution_types::{IndexCapabilities, IndexLocations};
use install_wheel_rs::linker::LinkMode;
use pep440_rs::Version;
use pep508_rs::{MarkerEnvironment, MarkerEnvironmentBuilder};
@ -152,6 +152,7 @@ mod resolver {
);
let flat_index = FlatIndex::default();
let git = GitResolver::default();
let capabilities = IndexCapabilities::default();
let hashes = HashStrategy::None;
let in_flight = InFlight::default();
let index = InMemoryIndex::default();
@ -179,6 +180,7 @@ mod resolver {
&flat_index,
&index,
&git,
&capabilities,
&in_flight,
IndexStrategy::default(),
&config_settings,

View file

@ -28,6 +28,7 @@ fs-err = { workspace = true }
itertools = { workspace = true }
jiff = { workspace = true }
rkyv = { workspace = true }
rustc-hash = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }

View file

@ -1,10 +1,11 @@
use itertools::Either;
use rustc_hash::FxHashSet;
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::ops::Deref;
use std::path::Path;
use std::str::FromStr;
use std::sync::LazyLock;
use std::sync::{Arc, LazyLock, RwLock};
use thiserror::Error;
use url::{ParseError, Url};
@ -485,3 +486,27 @@ impl From<IndexLocations> for IndexUrls {
}
}
}
/// A map of [`IndexUrl`]s to their capabilities.
///
/// For now, we only support a single capability (range requests), and we only store an index if
/// it _doesn't_ support range requests. The benefit is that the map is almost always empty, so
/// validating capabilities is extremely cheap.
#[derive(Debug, Default, Clone)]
pub struct IndexCapabilities(Arc<RwLock<FxHashSet<IndexUrl>>>);
impl IndexCapabilities {
/// Returns `true` if the given [`IndexUrl`] supports range requests.
pub fn supports_range_requests(&self, index_url: &IndexUrl) -> bool {
!self.0.read().unwrap().contains(index_url)
}
/// Mark an [`IndexUrl`] as not supporting range requests.
pub fn set_supports_range_requests(&self, index_url: IndexUrl, supports: bool) {
if supports {
self.0.write().unwrap().remove(&index_url);
} else {
self.0.write().unwrap().insert(index_url);
}
}
}

View file

@ -468,15 +468,14 @@ impl<'a> CompatibleDist<'a> {
}
}
/// Returns whether the distribution is a source distribution.
///
/// Avoid building source distributions we don't need.
pub fn prefetchable(&self) -> bool {
match *self {
CompatibleDist::SourceDist { .. } => false,
CompatibleDist::InstalledDist(_)
| CompatibleDist::CompatibleWheel { .. }
| CompatibleDist::IncompatibleWheel { .. } => true,
/// Returns a [`RegistryBuiltWheel`] if the distribution includes a compatible or incompatible
/// wheel.
pub fn wheel(&self) -> Option<&RegistryBuiltWheel> {
match self {
CompatibleDist::InstalledDist(_) => None,
CompatibleDist::SourceDist { .. } => None,
CompatibleDist::CompatibleWheel { wheel, .. } => Some(wheel),
CompatibleDist::IncompatibleWheel { wheel, .. } => Some(wheel),
}
}
}

View file

@ -1,6 +1,5 @@
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::io;
use std::path::PathBuf;
use std::str::FromStr;
@ -16,7 +15,9 @@ use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
use distribution_types::{
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name,
};
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
@ -147,7 +148,7 @@ impl<'a> RegistryClientBuilder<'a> {
}
impl<'a> TryFrom<BaseClientBuilder<'a>> for RegistryClientBuilder<'a> {
type Error = io::Error;
type Error = std::io::Error;
fn try_from(value: BaseClientBuilder<'a>) -> Result<Self, Self::Error> {
Ok(Self {
@ -402,7 +403,11 @@ impl RegistryClient {
/// 2. From a remote wheel by partial zip reading
/// 3. From a (temp) download of a remote wheel (this is a fallback, the webserver should support range requests)
#[instrument(skip_all, fields(% built_dist))]
pub async fn wheel_metadata(&self, built_dist: &BuiltDist) -> Result<Metadata23, Error> {
pub async fn wheel_metadata(
&self,
built_dist: &BuiltDist,
capabilities: &IndexCapabilities,
) -> Result<Metadata23, Error> {
let metadata = match &built_dist {
BuiltDist::Registry(wheels) => {
#[derive(Debug, Clone)]
@ -451,7 +456,7 @@ impl RegistryClient {
.await?
}
WheelLocation::Url(url) => {
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url)
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
.await?
}
}
@ -460,7 +465,9 @@ impl RegistryClient {
self.wheel_metadata_no_pep658(
&wheel.filename,
&wheel.url,
None,
WheelCache::Url(&wheel.url),
capabilities,
)
.await?
}
@ -489,6 +496,7 @@ impl RegistryClient {
index: &IndexUrl,
file: &File,
url: &Url,
capabilities: &IndexCapabilities,
) -> Result<Metadata23, Error> {
// If the metadata file is available at its own url (PEP 658), download it from there.
let filename = WheelFilename::from_str(&file.filename).map_err(ErrorKind::WheelFilename)?;
@ -536,7 +544,13 @@ impl RegistryClient {
// If we lack PEP 658 support, try using HTTP range requests to read only the
// `.dist-info/METADATA` file from the zip, and if that also fails, download the whole wheel
// into the cache and read from there
self.wheel_metadata_no_pep658(&filename, url, WheelCache::Index(index))
self.wheel_metadata_no_pep658(
&filename,
url,
Some(index),
WheelCache::Index(index),
capabilities,
)
.await
}
}
@ -546,7 +560,9 @@ impl RegistryClient {
&self,
filename: &'data WheelFilename,
url: &'data Url,
index: Option<&'data IndexUrl>,
cache_shard: WheelCache<'data>,
capabilities: &'data IndexCapabilities,
) -> Result<Metadata23, Error> {
let cache_entry = self.cache.entry(
CacheBucket::Wheels,
@ -562,6 +578,8 @@ impl RegistryClient {
Connectivity::Offline => CacheControl::AllowStale,
};
// Attempt to fetch via a range request.
if index.map_or(true, |index| capabilities.supports_range_requests(index)) {
let req = self
.uncached_client(url)
.head(url.clone())
@ -623,11 +641,17 @@ impl RegistryClient {
// The range request version failed. Fall back to streaming the file to search
// for the METADATA file.
warn!("Range requests not supported for {filename}; streaming wheel");
// Mark the index as not supporting range requests.
if let Some(index) = index {
capabilities.set_supports_range_requests(index.clone(), false);
}
} else {
return Err(err);
}
}
};
}
// Create a request to stream the file.
let req = self

View file

@ -4,7 +4,7 @@ use anyhow::Result;
use url::Url;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist};
use distribution_types::{BuiltDist, DirectUrlBuiltDist, IndexCapabilities};
use pep508_rs::VerbatimUrl;
use uv_cache::Cache;
use uv_client::RegistryClientBuilder;
@ -24,7 +24,8 @@ async fn remote_metadata_with_and_without_cache() -> Result<()> {
location: Url::parse(url).unwrap(),
url: VerbatimUrl::from_str(url).unwrap(),
});
let metadata = client.wheel_metadata(&dist).await.unwrap();
let capabilities = IndexCapabilities::default();
let metadata = client.wheel_metadata(&dist, &capabilities).await.unwrap();
assert_eq!(metadata.version.to_string(), "4.66.1");
}

View file

@ -5,7 +5,7 @@ use anyhow::{bail, Result};
use clap::Parser;
use distribution_filename::WheelFilename;
use distribution_types::{BuiltDist, DirectUrlBuiltDist, RemoteSource};
use distribution_types::{BuiltDist, DirectUrlBuiltDist, IndexCapabilities, RemoteSource};
use pep508_rs::VerbatimUrl;
use pypi_types::ParsedUrl;
use uv_cache::{Cache, CacheArgs};
@ -21,6 +21,7 @@ pub(crate) struct WheelMetadataArgs {
pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
let cache = Cache::try_from(args.cache_args)?.init()?;
let client = RegistryClientBuilder::new(cache).build();
let capabilities = IndexCapabilities::default();
let filename = WheelFilename::from_str(&args.url.filename()?)?;
@ -29,11 +30,14 @@ pub(crate) async fn wheel_metadata(args: WheelMetadataArgs) -> Result<()> {
};
let metadata = client
.wheel_metadata(&BuiltDist::DirectUrl(DirectUrlBuiltDist {
.wheel_metadata(
&BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename,
location: archive.url,
url: args.url,
}))
}),
&capabilities,
)
.await?;
println!("{metadata:?}");
Ok(())

View file

@ -11,7 +11,9 @@ use itertools::Itertools;
use rustc_hash::FxHashMap;
use tracing::{debug, instrument};
use distribution_types::{CachedDist, IndexLocations, Name, Resolution, SourceDist};
use distribution_types::{
CachedDist, IndexCapabilities, IndexLocations, Name, Resolution, SourceDist,
};
use pypi_types::Requirement;
use uv_build::{SourceBuild, SourceBuildContext};
use uv_cache::Cache;
@ -42,6 +44,7 @@ pub struct BuildDispatch<'a> {
flat_index: &'a FlatIndex,
index: &'a InMemoryIndex,
git: &'a GitResolver,
capabilities: &'a IndexCapabilities,
in_flight: &'a InFlight,
build_isolation: BuildIsolation<'a>,
link_mode: install_wheel_rs::linker::LinkMode,
@ -65,6 +68,7 @@ impl<'a> BuildDispatch<'a> {
flat_index: &'a FlatIndex,
index: &'a InMemoryIndex,
git: &'a GitResolver,
capabilities: &'a IndexCapabilities,
in_flight: &'a InFlight,
index_strategy: IndexStrategy,
config_settings: &'a ConfigSettings,
@ -85,6 +89,7 @@ impl<'a> BuildDispatch<'a> {
flat_index,
index,
git,
capabilities,
in_flight,
index_strategy,
config_settings,
@ -127,6 +132,10 @@ impl<'a> BuildContext for BuildDispatch<'a> {
self.git
}
fn capabilities(&self) -> &IndexCapabilities {
self.capabilities
}
fn build_options(&self) -> &BuildOptions {
self.build_options
}

View file

@ -373,7 +373,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
let result = self
.client
.managed(|client| client.wheel_metadata(dist).boxed_local())
.managed(|client| {
client
.wheel_metadata(dist, self.build_context.capabilities())
.boxed_local()
})
.await;
match result {

View file

@ -6,7 +6,7 @@ use rustc_hash::FxHashMap;
use tokio::sync::mpsc::Sender;
use tracing::{debug, trace};
use distribution_types::{CompatibleDist, DistributionMetadata};
use distribution_types::{CompatibleDist, DistributionMetadata, IndexCapabilities};
use pep440_rs::Version;
use crate::candidate_selector::CandidateSelector;
@ -52,6 +52,7 @@ impl BatchPrefetcher {
python_requirement: &PythonRequirement,
request_sink: &Sender<Request>,
index: &InMemoryIndex,
capabilities: &IndexCapabilities,
selector: &CandidateSelector,
markers: &ResolverMarkers,
) -> anyhow::Result<(), ResolveError> {
@ -135,8 +136,17 @@ impl BatchPrefetcher {
};
// Avoid prefetching source distributions, which could be expensive.
if !dist.prefetchable() {
let Some(wheel) = dist.wheel() else {
continue;
};
// Avoid prefetching built distributions that don't support _either_ PEP 658 (`.metadata`)
// or range requests.
if !(wheel.file.dist_info_metadata
|| capabilities.supports_range_requests(&wheel.index))
{
debug!("Abandoning prefetch for {wheel} due to missing registry capabilities");
return Ok(());
}
// Avoid prefetching for distributions that don't satisfy the Python requirement.

View file

@ -22,8 +22,8 @@ use tracing::{debug, info, instrument, trace, warn, Level};
use distribution_types::{
BuiltDist, CompatibleDist, Dist, DistributionMetadata, IncompatibleDist, IncompatibleSource,
IncompatibleWheel, IndexLocations, InstalledDist, PythonRequirementKind, RemoteSource,
ResolvedDist, ResolvedDistRef, SourceDist, VersionOrUrlRef,
IncompatibleWheel, IndexCapabilities, IndexLocations, InstalledDist, PythonRequirementKind,
RemoteSource, ResolvedDist, ResolvedDistRef, SourceDist, VersionOrUrlRef,
};
pub(crate) use fork_map::{ForkMap, ForkSet};
use locals::Locals;
@ -95,6 +95,7 @@ struct ResolverState<InstalledPackages: InstalledPackagesProvider> {
groups: Groups,
preferences: Preferences,
git: GitResolver,
capabilities: IndexCapabilities,
exclusions: Exclusions,
urls: Urls,
locals: Locals,
@ -169,6 +170,7 @@ impl<'a, Context: BuildContext, InstalledPackages: InstalledPackagesProvider>
python_requirement,
index,
build_context.git(),
build_context.capabilities(),
provider,
installed_packages,
)
@ -187,12 +189,14 @@ impl<Provider: ResolverProvider, InstalledPackages: InstalledPackagesProvider>
python_requirement: &PythonRequirement,
index: &InMemoryIndex,
git: &GitResolver,
capabilities: &IndexCapabilities,
provider: Provider,
installed_packages: InstalledPackages,
) -> Result<Self, ResolveError> {
let state = ResolverState {
index: index.clone(),
git: git.clone(),
capabilities: capabilities.clone(),
selector: CandidateSelector::for_resolution(options, &manifest, &markers),
dependency_mode: options.dependency_mode,
urls: Urls::from_manifest(&manifest, &markers, git, options.dependency_mode)?,
@ -458,6 +462,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
&state.python_requirement,
&request_sink,
&self.index,
&self.capabilities,
&self.selector,
&state.markers,
)?;
@ -1808,7 +1813,7 @@ impl<InstalledPackages: InstalledPackagesProvider> ResolverState<InstalledPackag
// Avoid prefetching source distributions with unbounded lower-bound ranges. This
// often leads to failed attempts to build legacy versions of packages that are
// incompatible with modern build tools.
if !dist.prefetchable() {
if dist.wheel().is_some() {
if !self.selector.use_highest_version(&package_name) {
if let Some((lower, _)) = range.iter().next() {
if lower == &Bound::Unbounded {

View file

@ -61,6 +61,7 @@ pub trait ResolverProvider {
dist: &'io Dist,
) -> impl Future<Output = WheelMetadataResult> + 'io;
/// Returns the [`IndexLocations`] used by this resolver.
fn index_locations(&self) -> &IndexLocations;
/// Set the [`uv_distribution::Reporter`] to use for this installer.

View file

@ -3,7 +3,9 @@ use std::path::{Path, PathBuf};
use anyhow::Result;
use distribution_types::{CachedDist, IndexLocations, InstalledDist, Resolution, SourceDist};
use distribution_types::{
CachedDist, IndexCapabilities, IndexLocations, InstalledDist, Resolution, SourceDist,
};
use pep508_rs::PackageName;
use pypi_types::Requirement;
use uv_cache::Cache;
@ -57,6 +59,9 @@ pub trait BuildContext {
/// Return a reference to the Git resolver.
fn git(&self) -> &GitResolver;
/// Return a reference to the discovered registry capabilities.
fn capabilities(&self) -> &IndexCapabilities;
/// Whether source distribution building or pre-built wheels is disabled.
///
/// This [`BuildContext::setup_build`] calls will fail if builds are disabled.

View file

@ -301,6 +301,7 @@ async fn build_impl(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_setting,

View file

@ -11,7 +11,7 @@ pub(crate) use build::build;
pub(crate) use cache_clean::cache_clean;
pub(crate) use cache_dir::cache_dir;
pub(crate) use cache_prune::cache_prune;
use distribution_types::InstalledMetadata;
use distribution_types::{IndexCapabilities, InstalledMetadata};
pub(crate) use help::help;
pub(crate) use pip::check::pip_check;
pub(crate) use pip::compile::pip_compile;
@ -200,6 +200,8 @@ pub(crate) struct SharedState {
pub(crate) index: InMemoryIndex,
/// The downloaded distributions.
pub(crate) in_flight: InFlight,
/// The discovered capabilities for each registry index.
pub(crate) capabilities: IndexCapabilities,
}
/// A multicasting writer that writes to both the standard output and an output file, if present.

View file

@ -8,7 +8,8 @@ use owo_colors::OwoColorize;
use tracing::debug;
use distribution_types::{
IndexLocations, NameRequirementSpecification, UnresolvedRequirementSpecification, Verbatim,
IndexCapabilities, IndexLocations, NameRequirementSpecification,
UnresolvedRequirementSpecification, Verbatim,
};
use install_wheel_rs::linker::LinkMode;
use pypi_types::{Requirement, SupportedEnvironments};
@ -292,6 +293,7 @@ pub(crate) async fn pip_compile(
// Read the lockfile, if present.
let preferences = read_requirements_txt(output_file, &upgrade).await?;
let git = GitResolver::default();
let capabilities = IndexCapabilities::default();
// Combine the `--no-binary` and `--no-build` flags from the requirements files.
let build_options = build_options.combine(no_binary, no_build);
@ -335,6 +337,7 @@ pub(crate) async fn pip_compile(
&flat_index,
&source_index,
&git,
&capabilities,
&in_flight,
index_strategy,
&config_settings,

View file

@ -339,6 +339,7 @@ pub(crate) async fn pip_install(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_settings,

View file

@ -289,6 +289,7 @@ pub(crate) async fn pip_sync(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_settings,

View file

@ -299,6 +299,7 @@ pub(crate) async fn add(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
settings.index_strategy,
&settings.config_setting,

View file

@ -411,6 +411,7 @@ async fn do_lock(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_setting,

View file

@ -519,6 +519,7 @@ pub(crate) async fn resolve_names(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
*index_strategy,
config_setting,
@ -657,6 +658,7 @@ pub(crate) async fn resolve_environment<'a>(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_setting,
@ -785,6 +787,7 @@ pub(crate) async fn sync_environment(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_setting,
@ -984,6 +987,7 @@ pub(crate) async fn update_environment(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
*index_strategy,
config_setting,

View file

@ -280,6 +280,7 @@ pub(super) async fn do_sync(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
config_setting,

View file

@ -318,6 +318,7 @@ async fn venv_impl(
&flat_index,
&state.index,
&state.git,
&state.capabilities,
&state.in_flight,
index_strategy,
&config_settings,