Remove lossy resolution-to-requirements conversion in install plan (#7798)

## Summary

This is a longstanding piece of technical debt. After we resolve, we
have a bunch of `ResolvedDist` entries. We then convert those to
`Requirement` (which is lossy -- we lose information like "the index
that the package was resolved to"), and then back to `Dist`.
This commit is contained in:
Charlie Marsh 2024-09-30 10:13:09 -04:00 committed by GitHub
parent dea5aa6b04
commit b6ce39f45e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 175 additions and 347 deletions

2
Cargo.lock generated
View file

@ -4888,7 +4888,6 @@ dependencies = [
"anyhow",
"async-channel",
"cache-key",
"distribution-filename",
"distribution-types",
"fs-err",
"futures",
@ -4911,7 +4910,6 @@ dependencies = [
"uv-distribution",
"uv-extract",
"uv-fs",
"uv-git",
"uv-normalize",
"uv-python",
"uv-types",

View file

@ -195,14 +195,8 @@ impl From<Dist> for ResolvedDist {
}
}
impl From<InstalledDist> for ResolvedDist {
fn from(value: InstalledDist) -> Self {
ResolvedDist::Installed(value)
}
}
impl Display for ResolvedDist {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Installed(dist) => dist.fmt(f),
Self::Installable(dist) => dist.fmt(f),

View file

@ -213,19 +213,16 @@ impl<'a> BuildContext for BuildDispatch<'a> {
// Determine the current environment markers.
let tags = self.interpreter.tags()?;
let markers = self.interpreter.resolver_markers();
// Determine the set of installed packages.
let site_packages = SitePackages::from_environment(venv)?;
let requirements = resolution.requirements().collect::<Vec<_>>();
let Plan {
cached,
remote,
reinstalls,
extraneous: _,
} = Planner::new(&requirements).build(
} = Planner::new(resolution).build(
site_packages,
&Reinstall::default(),
&BuildOptions::default(),
@ -234,7 +231,6 @@ impl<'a> BuildContext for BuildDispatch<'a> {
self.config_settings,
self.cache(),
venv,
&markers,
tags,
)?;
@ -244,17 +240,6 @@ impl<'a> BuildContext for BuildDispatch<'a> {
return Ok(vec![]);
}
// Resolve any registry-based requirements.
let remote = remote
.iter()
.map(|dist| {
resolution
.get_remote(&dist.name)
.cloned()
.expect("Resolution should contain all packages")
})
.collect::<Vec<_>>();
// Download any missing distributions.
let wheels = if remote.is_empty() {
vec![]

View file

@ -14,7 +14,6 @@ workspace = true
[dependencies]
cache-key = { workspace = true }
distribution-filename = { workspace = true }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true, default-features = false }
pep440_rs = { workspace = true }
@ -27,7 +26,6 @@ uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-extract = { workspace = true }
uv-fs = { workspace = true }
uv-git = { workspace = true }
uv-python = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }

View file

@ -1,26 +1,19 @@
use std::collections::hash_map::Entry;
use std::str::FromStr;
use anyhow::{bail, Result};
use rustc_hash::{FxBuildHasher, FxHashMap};
use tracing::debug;
use distribution_filename::{DistExtension, WheelFilename};
use distribution_types::{
CachedDirectUrlDist, CachedDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist,
Error, GitSourceDist, Hashed, IndexLocations, InstalledDist, Name, PathBuiltDist,
PathSourceDist, RemoteSource, Verbatim,
BuiltDist, CachedDirectUrlDist, CachedDist, Dist, Error, Hashed, IndexLocations, InstalledDist,
Name, Resolution, ResolvedDist, SourceDist,
};
use platform_tags::Tags;
use pypi_types::{Requirement, RequirementSource, ResolverMarkerEnvironment};
use pypi_types::Requirement;
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_cache_info::{CacheInfo, Timestamp};
use uv_configuration::{BuildOptions, ConfigSettings, Reinstall};
use uv_distribution::{
BuiltWheelIndex, HttpArchivePointer, LocalArchivePointer, RegistryWheelIndex,
};
use uv_fs::{normalize_absolute_path, Simplified};
use uv_git::GitUrl;
use uv_fs::Simplified;
use uv_python::PythonEnvironment;
use uv_types::HashStrategy;
@ -30,13 +23,13 @@ use crate::SitePackages;
/// A planner to generate an [`Plan`] based on a set of requirements.
#[derive(Debug)]
pub struct Planner<'a> {
requirements: &'a [Requirement],
resolution: &'a Resolution,
}
impl<'a> Planner<'a> {
/// Set the requirements use in the [`Plan`].
pub fn new(requirements: &'a [Requirement]) -> Self {
Self { requirements }
pub fn new(resolution: &'a Resolution) -> Self {
Self { resolution }
}
/// Partition a set of requirements into those that should be linked from the cache, those that
@ -60,7 +53,6 @@ impl<'a> Planner<'a> {
config_settings: &ConfigSettings,
cache: &Cache,
venv: &PythonEnvironment,
markers: &ResolverMarkerEnvironment,
tags: &Tags,
) -> Result<Plan> {
// Index all the already-downloaded wheels in the cache.
@ -71,43 +63,23 @@ impl<'a> Planner<'a> {
let mut remote = vec![];
let mut reinstalls = vec![];
let mut extraneous = vec![];
let mut seen = FxHashMap::with_capacity_and_hasher(self.requirements.len(), FxBuildHasher);
for requirement in self.requirements {
// Filter out incompatible requirements.
if !requirement.evaluate_markers(Some(markers), &[]) {
continue;
}
// If we see the same requirement twice, then we have a conflict.
match seen.entry(requirement.name.clone()) {
Entry::Occupied(value) => {
if value.get() == &&requirement.source {
continue;
}
bail!(
"Detected duplicate package in requirements: {}",
requirement.name
);
}
Entry::Vacant(entry) => {
entry.insert(&requirement.source);
}
}
for dist in self.resolution.distributions() {
// Check if the package should be reinstalled.
let reinstall = match reinstall {
Reinstall::None => false,
Reinstall::All => true,
Reinstall::Packages(packages) => packages.contains(&requirement.name),
Reinstall::Packages(packages) => packages.contains(dist.name()),
};
// Check if installation of a binary version of the package should be allowed.
let no_binary = build_options.no_binary_package(&requirement.name);
let no_build = build_options.no_build_package(&requirement.name);
let no_binary = build_options.no_binary_package(dist.name());
let no_build = build_options.no_build_package(dist.name());
let requirement = Requirement::from(dist);
// Determine whether the distribution is already installed.
let installed_dists = site_packages.remove_packages(&requirement.name);
if reinstall {
reinstalls.extend(installed_dists);
} else {
@ -135,290 +107,204 @@ impl<'a> Planner<'a> {
}
}
if cache.must_revalidate(&requirement.name) {
debug!("Must revalidate requirement: {}", requirement.name);
remote.push(requirement.clone());
let ResolvedDist::Installable(installable) = dist else {
unreachable!("Installed distribution could not be found in site-packages: {dist}");
};
if cache.must_revalidate(dist.name()) {
debug!("Must revalidate requirement: {}", dist.name());
remote.push(installable.clone());
continue;
}
// Identify any cached distributions that satisfy the requirement.
match &requirement.source {
RequirementSource::Registry { specifier, .. } => {
if let Some(distribution) =
registry_index.get(&requirement.name).find_map(|entry| {
if !specifier.contains(&entry.dist.filename.version) {
return None;
};
if entry.built && no_build {
return None;
}
if !entry.built && no_binary {
return None;
}
Some(&entry.dist)
})
{
match installable {
Dist::Built(BuiltDist::Registry(wheel)) => {
if let Some(distribution) = registry_index.get(wheel.name()).find_map(|entry| {
if entry.dist.filename.version != wheel.best_wheel().filename.version {
return None;
};
if entry.built && no_build {
return None;
}
if !entry.built && no_binary {
return None;
}
Some(&entry.dist)
}) {
debug!("Requirement already cached: {distribution}");
cached.push(CachedDist::Registry(distribution.clone()));
continue;
}
}
RequirementSource::Url {
location,
subdirectory,
ext,
url,
} => {
match ext {
DistExtension::Wheel => {
// Validate that the name in the wheel matches that of the requirement.
let filename = WheelFilename::from_str(&url.filename()?)?;
if filename.name != requirement.name {
return Err(Error::PackageNameMismatch(
requirement.name.clone(),
filename.name,
url.verbatim().to_string(),
)
.into());
}
Dist::Built(BuiltDist::DirectUrl(wheel)) => {
if !wheel.filename.is_compatible(tags) {
bail!(
"A URL dependency is incompatible with the current platform: {}",
wheel.url
);
}
let wheel = DirectUrlBuiltDist {
filename,
location: location.clone(),
url: url.clone(),
};
if no_binary {
bail!(
"A URL dependency points to a wheel which conflicts with `--no-binary`: {}",
wheel.url
);
}
if !wheel.filename.is_compatible(tags) {
bail!(
"A URL dependency is incompatible with the current platform: {}",
wheel.url
// Find the exact wheel from the cache, since we know the filename in
// advance.
let cache_entry = cache
.shard(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.http", wheel.filename.stem()));
// Read the HTTP pointer.
if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? {
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(dist)) {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename.clone(),
wheel.url.clone(),
archive.hashes,
CacheInfo::default(),
cache.archive(&archive.id),
);
}
if no_binary {
bail!(
"A URL dependency points to a wheel which conflicts with `--no-binary`: {}",
wheel.url
);
}
// Find the exact wheel from the cache, since we know the filename in
// advance.
let cache_entry = cache
.shard(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.http", wheel.filename.stem()));
// Read the HTTP pointer.
if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? {
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(&wheel)) {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
archive.hashes,
CacheInfo::default(),
cache.archive(&archive.id),
);
debug!("URL wheel requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
debug!("URL wheel requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
DistExtension::Source(ext) => {
let sdist = DirectUrlSourceDist {
name: requirement.name.clone(),
location: location.clone(),
subdirectory: subdirectory.clone(),
ext: *ext,
url: url.clone(),
};
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.url(&sdist)? {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("URL source requirement already cached: {cached_dist}");
}
}
Dist::Built(BuiltDist::Path(wheel)) => {
// Validate that the path exists.
if !wheel.install_path.exists() {
return Err(Error::NotFound(wheel.url.to_url()).into());
}
if !wheel.filename.is_compatible(tags) {
bail!(
"A path dependency is incompatible with the current platform: {}",
wheel.install_path.user_display()
);
}
if no_binary {
bail!(
"A path dependency points to a wheel which conflicts with `--no-binary`: {}",
wheel.url
);
}
// Find the exact wheel from the cache, since we know the filename in
// advance.
let cache_entry = cache
.shard(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.rev", wheel.filename.stem()));
if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? {
let timestamp = Timestamp::from_path(&wheel.install_path)?;
if pointer.is_up_to_date(timestamp) {
let cache_info = pointer.to_cache_info();
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(dist)) {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename.clone(),
wheel.url.clone(),
archive.hashes,
cache_info,
cache.archive(&archive.id),
);
debug!("Path wheel requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
}
}
RequirementSource::Git {
repository,
reference,
precise,
subdirectory,
url,
} => {
let git = if let Some(precise) = precise {
GitUrl::from_commit(repository.clone(), reference.clone(), *precise)
} else {
GitUrl::from_reference(repository.clone(), reference.clone())
};
let sdist = GitSourceDist {
name: requirement.name.clone(),
git: Box::new(git),
subdirectory: subdirectory.clone(),
url: url.clone(),
};
Dist::Source(SourceDist::Registry(sdist)) => {
if let Some(distribution) = registry_index.get(sdist.name()).find_map(|entry| {
if entry.dist.filename.version != sdist.version {
return None;
};
if entry.built && no_build {
return None;
}
if !entry.built && no_binary {
return None;
}
Some(&entry.dist)
}) {
debug!("Requirement already cached: {distribution}");
cached.push(CachedDist::Registry(distribution.clone()));
continue;
}
}
Dist::Source(SourceDist::DirectUrl(sdist)) => {
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.git(&sdist) {
let cached_dist = wheel.into_url_dist(url.clone());
if let Some(wheel) = built_index.url(sdist)? {
let cached_dist = wheel.into_url_dist(sdist.url.clone());
debug!("URL source requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
Dist::Source(SourceDist::Git(sdist)) => {
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.git(sdist) {
let cached_dist = wheel.into_url_dist(sdist.url.clone());
debug!("Git source requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
RequirementSource::Directory {
r#virtual,
url,
editable,
install_path,
} => {
// Convert to an absolute path.
let install_path = std::path::absolute(install_path)?;
// Normalize the path.
let install_path = normalize_absolute_path(&install_path)?;
Dist::Source(SourceDist::Path(sdist)) => {
// Validate that the path exists.
if !install_path.exists() {
return Err(Error::NotFound(url.to_url()).into());
if !sdist.install_path.exists() {
return Err(Error::NotFound(sdist.url.to_url()).into());
}
let sdist = DirectorySourceDist {
name: requirement.name.clone(),
url: url.clone(),
install_path,
editable: *editable,
r#virtual: *r#virtual,
};
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.directory(&sdist)? {
let cached_dist = if *editable {
wheel.into_editable(url.clone())
if let Some(wheel) = built_index.path(sdist)? {
let cached_dist = wheel.into_url_dist(sdist.url.clone());
debug!("Path source requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
Dist::Source(SourceDist::Directory(sdist)) => {
// Validate that the path exists.
if !sdist.install_path.exists() {
return Err(Error::NotFound(sdist.url.to_url()).into());
}
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.directory(sdist)? {
let cached_dist = if sdist.editable {
wheel.into_editable(sdist.url.clone())
} else {
wheel.into_url_dist(url.clone())
wheel.into_url_dist(sdist.url.clone())
};
debug!("Directory source requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
RequirementSource::Path {
ext,
url,
install_path,
} => {
// Convert to an absolute path.
let install_path = std::path::absolute(install_path)?;
// Normalize the path.
let install_path = normalize_absolute_path(&install_path)?;
// Validate that the path exists.
if !install_path.exists() {
return Err(Error::NotFound(url.to_url()).into());
}
match ext {
DistExtension::Wheel => {
// Validate that the name in the wheel matches that of the requirement.
let filename = WheelFilename::from_str(&url.filename()?)?;
if filename.name != requirement.name {
return Err(Error::PackageNameMismatch(
requirement.name.clone(),
filename.name,
url.verbatim().to_string(),
)
.into());
}
let wheel = PathBuiltDist {
filename,
url: url.clone(),
install_path,
};
if !wheel.filename.is_compatible(tags) {
bail!(
"A path dependency is incompatible with the current platform: {}",
wheel.install_path.user_display()
);
}
if no_binary {
bail!(
"A path dependency points to a wheel which conflicts with `--no-binary`: {}",
wheel.url
);
}
// Find the exact wheel from the cache, since we know the filename in
// advance.
let cache_entry = cache
.shard(
CacheBucket::Wheels,
WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()),
)
.entry(format!("{}.rev", wheel.filename.stem()));
if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? {
let timestamp = Timestamp::from_path(&wheel.install_path)?;
if pointer.is_up_to_date(timestamp) {
let cache_info = pointer.to_cache_info();
let archive = pointer.into_archive();
if archive.satisfies(hasher.get(&wheel)) {
let cached_dist = CachedDirectUrlDist::from_url(
wheel.filename,
wheel.url,
archive.hashes,
cache_info,
cache.archive(&archive.id),
);
debug!(
"Path wheel requirement already cached: {cached_dist}"
);
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
}
}
DistExtension::Source(ext) => {
let sdist = PathSourceDist {
name: requirement.name.clone(),
url: url.clone(),
install_path,
ext: *ext,
};
// Find the most-compatible wheel from the cache, since we don't know
// the filename in advance.
if let Some(wheel) = built_index.path(&sdist)? {
let cached_dist = wheel.into_url_dist(url.clone());
debug!("Path source requirement already cached: {cached_dist}");
cached.push(CachedDist::Url(cached_dist));
continue;
}
}
}
}
}
debug!("Identified uncached requirement: {requirement}");
remote.push(requirement.clone());
debug!("Identified uncached distribution: {installable}");
remote.push(installable.clone());
}
// Remove any unnecessary packages.
@ -459,7 +345,7 @@ pub struct Plan {
/// The distributions that are not already installed in the current environment, and are
/// not available in the local cache.
pub remote: Vec<Requirement>,
pub remote: Vec<Dist>,
/// Any distributions that are already installed in the current environment, but will be
/// re-installed (including upgraded) to satisfy the requirements.

View file

@ -413,7 +413,6 @@ pub(crate) async fn pip_install(
&index_locations,
config_settings,
&hasher,
&markers,
&tags,
&client,
&state.in_flight,

View file

@ -391,7 +391,6 @@ pub(crate) async fn install(
index_urls: &IndexLocations,
config_settings: &ConfigSettings,
hasher: &HashStrategy,
markers: &ResolverMarkerEnvironment,
tags: &Tags,
client: &RegistryClient,
in_flight: &InFlight,
@ -405,12 +404,9 @@ pub(crate) async fn install(
) -> Result<Changelog, Error> {
let start = std::time::Instant::now();
// Extract the requirements from the resolution.
let requirements = resolution.requirements().collect::<Vec<_>>();
// Partition into those that should be linked from the cache (`local`), those that need to be
// downloaded (`remote`), and those that should be removed (`extraneous`).
let plan = Planner::new(&requirements)
let plan = Planner::new(resolution)
.build(
site_packages,
reinstall,
@ -420,7 +416,6 @@ pub(crate) async fn install(
config_settings,
cache,
venv,
markers,
tags,
)
.context("Failed to determine installation plan")?;
@ -449,17 +444,6 @@ pub(crate) async fn install(
return Ok(Changelog::default());
}
// Map any registry-based requirements back to those returned by the resolver.
let remote = remote
.iter()
.map(|dist| {
resolution
.get_remote(&dist.name)
.cloned()
.expect("Resolution should contain all packages")
})
.collect::<Vec<_>>();
// Download, build, and unzip any missing distributions.
let wheels = if remote.is_empty() {
vec![]
@ -582,17 +566,6 @@ fn report_dry_run(
return Ok(());
}
// Map any registry-based requirements back to those returned by the resolver.
let remote = remote
.iter()
.map(|dist| {
resolution
.get_remote(&dist.name)
.cloned()
.expect("Resolution should contain all packages")
})
.collect::<Vec<_>>();
// Download, build, and unzip any missing distributions.
let wheels = if remote.is_empty() {
vec![]

View file

@ -364,7 +364,6 @@ pub(crate) async fn pip_sync(
&index_locations,
config_settings,
&hasher,
&markers,
&tags,
&client,
&state.in_flight,

View file

@ -928,7 +928,6 @@ pub(crate) async fn sync_environment(
// Determine the markers tags to use for resolution.
let interpreter = venv.interpreter();
let tags = venv.interpreter().tags()?;
let markers = interpreter.resolver_markers();
// Add all authenticated sources to the cache.
for url in index_locations.urls() {
@ -1006,7 +1005,6 @@ pub(crate) async fn sync_environment(
index_locations,
config_setting,
&hasher,
&markers,
tags,
&client,
&state.in_flight,
@ -1242,7 +1240,6 @@ pub(crate) async fn update_environment(
index_locations,
config_setting,
&hasher,
&markers,
tags,
&client,
&state.in_flight,

View file

@ -336,7 +336,6 @@ pub(super) async fn do_sync(
index_locations,
config_setting,
&hasher,
&markers,
tags,
&client,
&state.in_flight,