From bd01fb490e209a24a0ab24815892cb9c9235407f Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 19 Oct 2023 00:14:20 -0400 Subject: [PATCH] Remove packages when syncing (#135) `pip-sync` will now uninstall any packages that aren't necessary. Closes https://github.com/astral-sh/puffin/issues/128. --- crates/puffin-cli/src/commands/mod.rs | 2 +- crates/puffin-cli/src/commands/pip_sync.rs | 266 +++++++++++------- .../puffin-cli/src/commands/pip_uninstall.rs | 24 +- crates/puffin-cli/src/main.rs | 9 - crates/puffin-installer/src/uninstall.rs | 6 +- crates/puffin-interpreter/src/lib.rs | 2 +- .../puffin-interpreter/src/site_packages.rs | 30 +- scripts/benchmarks/sync.sh | 8 +- 8 files changed, 215 insertions(+), 132 deletions(-) diff --git a/crates/puffin-cli/src/commands/mod.rs b/crates/puffin-cli/src/commands/mod.rs index 0ce96571a..073a09b98 100644 --- a/crates/puffin-cli/src/commands/mod.rs +++ b/crates/puffin-cli/src/commands/mod.rs @@ -5,7 +5,7 @@ pub(crate) use add::add; pub(crate) use clean::clean; pub(crate) use freeze::freeze; pub(crate) use pip_compile::pip_compile; -pub(crate) use pip_sync::{pip_sync, PipSyncFlags}; +pub(crate) use pip_sync::pip_sync; pub(crate) use pip_uninstall::pip_uninstall; pub(crate) use remove::remove; pub(crate) use venv::venv; diff --git a/crates/puffin-cli/src/commands/pip_sync.rs b/crates/puffin-cli/src/commands/pip_sync.rs index 7b7b0e29a..0db4eb37d 100644 --- a/crates/puffin-cli/src/commands/pip_sync.rs +++ b/crates/puffin-cli/src/commands/pip_sync.rs @@ -2,8 +2,8 @@ use std::fmt::Write; use std::path::Path; use anyhow::{bail, Context, Result}; -use bitflags::bitflags; -use itertools::{Either, Itertools}; + +use itertools::Itertools; use owo_colors::OwoColorize; use pep508_rs::Requirement; use tracing::debug; @@ -12,7 +12,7 @@ use platform_host::Platform; use platform_tags::Tags; use puffin_client::PypiClientBuilder; use puffin_installer::{LocalDistribution, LocalIndex, RemoteDistribution}; -use puffin_interpreter::{PythonExecutable, SitePackages}; +use puffin_interpreter::{Distribution, PythonExecutable, SitePackages}; use puffin_package::package_name::PackageName; use puffin_package::requirements_txt::RequirementsTxt; use puffin_resolver::Resolution; @@ -23,19 +23,10 @@ use crate::commands::reporters::{ use crate::commands::{elapsed, ExitStatus}; use crate::printer::Printer; -bitflags! { - #[derive(Debug, Copy, Clone, Default)] - pub struct PipSyncFlags: u8 { - /// Ignore any installed packages, forcing a re-installation. - const IGNORE_INSTALLED = 1 << 0; - } -} - /// Install a set of locked requirements into the current Python environment. pub(crate) async fn pip_sync( src: &Path, cache: Option<&Path>, - flags: PipSyncFlags, mut printer: Printer, ) -> Result { // Read the `requirements.txt` from disk. @@ -53,14 +44,13 @@ pub(crate) async fn pip_sync( return Ok(ExitStatus::Success); } - sync_requirements(&requirements, cache, flags, printer).await + sync_requirements(&requirements, cache, printer).await } /// Install a set of locked requirements into the current Python environment. pub(crate) async fn sync_requirements( requirements: &[Requirement], cache: Option<&Path>, - flags: PipSyncFlags, mut printer: Printer, ) -> Result { // Audit the requirements. @@ -74,15 +64,16 @@ pub(crate) async fn sync_requirements( python.executable().display() ); - // Determine the current environment markers. - let tags = Tags::from_env(python.platform(), python.simple_version())?; - - // Filter out any already-installed or already-cached packages. - let (cached, uncached) = - find_uncached_requirements(requirements, cache, flags, &python).await?; + // Partition into those that should be linked from the cache (`local`), those that need to be + // downloaded (`remote`), and those that should be removed (`extraneous`). + let PartitionedRequirements { + local, + remote, + extraneous, + } = PartitionedRequirements::try_from_requirements(requirements, cache, &python).await?; // Nothing to do. - if uncached.is_empty() && cached.is_empty() { + if remote.is_empty() && local.is_empty() && extraneous.is_empty() { let s = if requirements.len() == 1 { "" } else { "s" }; writeln!( printer, @@ -98,17 +89,19 @@ pub(crate) async fn sync_requirements( return Ok(ExitStatus::Success); } + // Determine the current environment markers. + let tags = Tags::from_env(python.platform(), python.simple_version())?; let client = PypiClientBuilder::default().cache(cache).build(); // Resolve the dependencies. - let resolution = if uncached.is_empty() { + let resolution = if remote.is_empty() { Resolution::default() } else { let start = std::time::Instant::now(); let wheel_finder = puffin_resolver::WheelFinder::new(&tags, &client) - .with_reporter(WheelFinderReporter::from(printer).with_length(uncached.len() as u64)); - let resolution = wheel_finder.resolve(&uncached).await?; + .with_reporter(WheelFinderReporter::from(printer).with_length(remote.len() as u64)); + let resolution = wheel_finder.resolve(&remote).await?; let s = if resolution.len() == 1 { "" } else { "s" }; writeln!( @@ -125,13 +118,12 @@ pub(crate) async fn sync_requirements( resolution }; + // Download any missing distributions. + let staging = tempfile::tempdir()?; let uncached = resolution .into_files() .map(RemoteDistribution::from_file) .collect::>>()?; - let staging = tempfile::tempdir()?; - - // Download any missing distributions. let downloads = if uncached.is_empty() { vec![] } else { @@ -188,89 +180,147 @@ pub(crate) async fn sync_requirements( unzips }; - // Install the resolved distributions. - let start = std::time::Instant::now(); - let wheels = unzips.into_iter().chain(cached).collect::>(); - puffin_installer::Installer::new(&python) - .with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64)) - .install(&wheels)?; + // Remove any unnecessary packages. + if !extraneous.is_empty() { + let start = std::time::Instant::now(); - let s = if wheels.len() == 1 { "" } else { "s" }; - writeln!( - printer, - "{}", - format!( - "Installed {} in {}", - format!("{} package{}", wheels.len(), s).bold(), - elapsed(start.elapsed()) - ) - .dimmed() - )?; + for dist_info in &extraneous { + let summary = puffin_installer::uninstall(dist_info).await?; + debug!( + "Uninstalled {} ({} file{}, {} director{})", + dist_info.name(), + summary.file_count, + if summary.file_count == 1 { "" } else { "s" }, + summary.dir_count, + if summary.dir_count == 1 { "y" } else { "ies" }, + ); + } - for wheel in wheels { + let s = if extraneous.len() == 1 { "" } else { "s" }; writeln!( printer, - " {} {}{}", - "+".green(), - wheel.name().as_ref().white().bold(), - format!("@{}", wheel.version()).dimmed() + "{}", + format!( + "Uninstalled {} in {}", + format!("{} package{}", extraneous.len(), s).bold(), + elapsed(start.elapsed()) + ) + .dimmed() )?; } + // Install the resolved distributions. + let wheels = unzips.into_iter().chain(local).collect::>(); + if !wheels.is_empty() { + let start = std::time::Instant::now(); + puffin_installer::Installer::new(&python) + .with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64)) + .install(&wheels)?; + + let s = if wheels.len() == 1 { "" } else { "s" }; + writeln!( + printer, + "{}", + format!( + "Installed {} in {}", + format!("{} package{}", wheels.len(), s).bold(), + elapsed(start.elapsed()) + ) + .dimmed() + )?; + } + + for dist in extraneous + .iter() + .map(|dist_info| PackageModification { + name: dist_info.name(), + version: dist_info.version(), + modification: Modification::Remove, + }) + .chain(wheels.iter().map(|dist_info| PackageModification { + name: dist_info.name(), + version: dist_info.version(), + modification: Modification::Add, + })) + .sorted_unstable_by_key(|modification| modification.name) + { + match dist.modification { + Modification::Add => { + writeln!( + printer, + " {} {}{}", + "+".green(), + dist.name.as_ref().white().bold(), + format!("@{}", dist.version).dimmed() + )?; + } + Modification::Remove => { + writeln!( + printer, + " {} {}{}", + "-".red(), + dist.name.as_ref().white().bold(), + format!("@{}", dist.version).dimmed() + )?; + } + } + } + Ok(ExitStatus::Success) } -async fn find_uncached_requirements( - requirements: &[Requirement], - cache: Option<&Path>, - flags: PipSyncFlags, - python: &PythonExecutable, -) -> Result<(Vec, Vec)> { - // Index all the already-installed packages in site-packages. - let site_packages = if flags.intersects(PipSyncFlags::IGNORE_INSTALLED) { - SitePackages::default() - } else { - SitePackages::from_executable(python).await? - }; +#[derive(Debug, Default)] +struct PartitionedRequirements { + /// The distributions that are not already installed in the current environment, but are + /// available in the local cache. + local: Vec, - // Index all the already-downloaded wheels in the cache. - let local_index = if let Some(cache) = cache { - LocalIndex::from_directory(cache).await? - } else { - LocalIndex::default() - }; + /// The distributions that are not already installed in the current environment, and are + /// not available in the local cache. + remote: Vec, - Ok(split_uncached_requirements( - requirements, - &site_packages, - &local_index, - )) + /// The distributions that are already installed in the current environment, and are + /// _not_ necessary to satisfy the requirements. + extraneous: Vec, } -fn split_uncached_requirements( - requirements: &[Requirement], - site_packages: &SitePackages, - local_index: &LocalIndex, -) -> (Vec, Vec) { - requirements - .iter() - .filter(|requirement| { +impl PartitionedRequirements { + /// Partition a set of requirements into those that should be linked from the cache, those that + /// need to be downloaded, and those that should be removed. + pub(crate) async fn try_from_requirements( + requirements: &[Requirement], + cache: Option<&Path>, + python: &PythonExecutable, + ) -> Result { + // Index all the already-installed packages in site-packages. + let mut site_packages = SitePackages::from_executable(python).await?; + + // Index all the already-downloaded wheels in the cache. + let local_index = if let Some(cache) = cache { + LocalIndex::from_directory(cache).await? + } else { + LocalIndex::default() + }; + + let mut local = vec![]; + let mut remote = vec![]; + let mut extraneous = vec![]; + + for requirement in requirements { let package = PackageName::normalize(&requirement.name); // Filter out already-installed packages. - if let Some(dist_info) = site_packages.get(&package) { - debug!( - "Requirement already satisfied: {} ({})", - package, - dist_info.version() - ); - false - } else { - true + if let Some(dist) = site_packages.remove(&package) { + if requirement.is_satisfied_by(dist.version()) { + debug!( + "Requirement already satisfied: {} ({})", + package, + dist.version() + ); + continue; + } + extraneous.push(dist); } - }) - .partition_map(|requirement| { - let package = PackageName::normalize(&requirement.name); // Identify any locally-available distributions that satisfy the requirement. if let Some(distribution) = local_index @@ -282,10 +332,38 @@ fn split_uncached_requirements( distribution.name(), distribution.version() ); - Either::Left(distribution.clone()) + local.push(distribution.clone()); } else { debug!("Identified uncached requirement: {}", requirement); - Either::Right(requirement.clone()) + remote.push(requirement.clone()); } + } + + // Remove any unnecessary packages. + for (package, dist_info) in site_packages { + debug!("Unnecessary package: {} ({})", package, dist_info.version()); + extraneous.push(dist_info); + } + + Ok(PartitionedRequirements { + local, + remote, + extraneous, }) + } +} + +#[derive(Debug)] +enum Modification { + /// The package was added to the environment. + Add, + /// The package was removed from the environment. + Remove, +} + +#[derive(Debug)] +struct PackageModification<'a> { + name: &'a PackageName, + version: &'a pep440_rs::Version, + modification: Modification, } diff --git a/crates/puffin-cli/src/commands/pip_uninstall.rs b/crates/puffin-cli/src/commands/pip_uninstall.rs index 11432c590..801779c47 100644 --- a/crates/puffin-cli/src/commands/pip_uninstall.rs +++ b/crates/puffin-cli/src/commands/pip_uninstall.rs @@ -53,11 +53,11 @@ pub(crate) async fn pip_uninstall( }; // Map to the local distributions. - let dist_infos = packages + let distributions = packages .iter() .filter_map(|package| { - if let Some(dist_info) = site_packages.get(package) { - Some(dist_info) + if let Some(distribution) = site_packages.get(package) { + Some(distribution) } else { let _ = writeln!( printer, @@ -71,7 +71,7 @@ pub(crate) async fn pip_uninstall( }) .collect::>(); - if dist_infos.is_empty() { + if distributions.is_empty() { writeln!( printer, "{}{} No packages to uninstall.", @@ -82,11 +82,11 @@ pub(crate) async fn pip_uninstall( } // Uninstall each package. - for dist_info in &dist_infos { - let summary = puffin_installer::uninstall(dist_info).await?; + for distribution in &distributions { + let summary = puffin_installer::uninstall(distribution).await?; debug!( "Uninstalled {} ({} file{}, {} director{})", - dist_info.name(), + distribution.name(), summary.file_count, if summary.file_count == 1 { "" } else { "s" }, summary.dir_count, @@ -101,8 +101,8 @@ pub(crate) async fn pip_uninstall( "Uninstalled {} in {}", format!( "{} package{}", - dist_infos.len(), - if dist_infos.len() == 1 { "" } else { "s" } + distributions.len(), + if distributions.len() == 1 { "" } else { "s" } ) .bold(), elapsed(start.elapsed()) @@ -110,13 +110,13 @@ pub(crate) async fn pip_uninstall( .dimmed() )?; - for dist_info in dist_infos { + for distribution in distributions { writeln!( printer, " {} {}{}", "-".red(), - dist_info.name().as_ref().white().bold(), - format!("@{}", dist_info.version()).dimmed() + distribution.name().as_ref().white().bold(), + format!("@{}", distribution.version()).dimmed() )?; } diff --git a/crates/puffin-cli/src/main.rs b/crates/puffin-cli/src/main.rs index cd9639249..49539e587 100644 --- a/crates/puffin-cli/src/main.rs +++ b/crates/puffin-cli/src/main.rs @@ -66,10 +66,6 @@ struct PipCompileArgs { struct PipSyncArgs { /// Path to the `requirements.txt` file to install. src: PathBuf, - - /// Ignore any installed packages, forcing a re-installation. - #[arg(long)] - ignore_installed: bool, } #[derive(Args)] @@ -145,11 +141,6 @@ async fn main() -> ExitCode { dirs.as_ref() .map(ProjectDirs::cache_dir) .filter(|_| !cli.no_cache), - if args.ignore_installed { - commands::PipSyncFlags::IGNORE_INSTALLED - } else { - commands::PipSyncFlags::empty() - }, printer, ) .await diff --git a/crates/puffin-installer/src/uninstall.rs b/crates/puffin-installer/src/uninstall.rs index ae9d92c12..ac7c5bebf 100644 --- a/crates/puffin-installer/src/uninstall.rs +++ b/crates/puffin-installer/src/uninstall.rs @@ -1,11 +1,11 @@ use anyhow::Result; -use puffin_interpreter::DistInfo; +use puffin_interpreter::Distribution; /// Uninstall a package from the specified Python environment. -pub async fn uninstall(dist_info: &DistInfo) -> Result { +pub async fn uninstall(distribution: &Distribution) -> Result { let uninstall = tokio::task::spawn_blocking({ - let path = dist_info.path().to_owned(); + let path = distribution.path().to_owned(); move || install_wheel_rs::uninstall_wheel(&path) }) .await??; diff --git a/crates/puffin-interpreter/src/lib.rs b/crates/puffin-interpreter/src/lib.rs index e021863c0..dcba4af27 100644 --- a/crates/puffin-interpreter/src/lib.rs +++ b/crates/puffin-interpreter/src/lib.rs @@ -7,7 +7,7 @@ use pep508_rs::MarkerEnvironment; use platform_host::Platform; use crate::python_platform::PythonPlatform; -pub use crate::site_packages::{DistInfo, SitePackages}; +pub use crate::site_packages::{Distribution, SitePackages}; mod markers; mod python_platform; diff --git a/crates/puffin-interpreter/src/site_packages.rs b/crates/puffin-interpreter/src/site_packages.rs index 4c3819699..da1ad6db5 100644 --- a/crates/puffin-interpreter/src/site_packages.rs +++ b/crates/puffin-interpreter/src/site_packages.rs @@ -11,7 +11,7 @@ use puffin_package::package_name::PackageName; use crate::PythonExecutable; #[derive(Debug, Default)] -pub struct SitePackages(BTreeMap); +pub struct SitePackages(BTreeMap); impl SitePackages { /// Build an index of installed packages from the given Python executable. @@ -21,7 +21,7 @@ impl SitePackages { let mut dir = fs::read_dir(python.site_packages()).await?; while let Some(entry) = dir.next_entry().await? { if entry.file_type().await?.is_dir() { - if let Some(dist_info) = DistInfo::try_from_path(&entry.path())? { + if let Some(dist_info) = Distribution::try_from_path(&entry.path())? { index.insert(dist_info.name().clone(), dist_info); } } @@ -31,24 +31,38 @@ impl SitePackages { } /// Returns an iterator over the installed packages. - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator { self.0.iter() } /// Returns the version of the given package, if it is installed. - pub fn get(&self, name: &PackageName) -> Option<&DistInfo> { + pub fn get(&self, name: &PackageName) -> Option<&Distribution> { self.0.get(name) } + + /// Remove the given package from the index, returning its version if it was installed. + pub fn remove(&mut self, name: &PackageName) -> Option { + self.0.remove(name) + } } -#[derive(Debug)] -pub struct DistInfo { +impl IntoIterator for SitePackages { + type Item = (PackageName, Distribution); + type IntoIter = std::collections::btree_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[derive(Debug, Clone)] +pub struct Distribution { name: PackageName, version: Version, path: PathBuf, } -impl DistInfo { +impl Distribution { /// Try to parse a (potential) `dist-info` directory into a package name and version. /// /// See: @@ -68,7 +82,7 @@ impl DistInfo { let version = Version::from_str(version).map_err(|err| anyhow!(err))?; let path = path.to_path_buf(); - return Ok(Some(DistInfo { + return Ok(Some(Distribution { name, version, path, diff --git a/scripts/benchmarks/sync.sh b/scripts/benchmarks/sync.sh index 7688330ac..a7716bedb 100755 --- a/scripts/benchmarks/sync.sh +++ b/scripts/benchmarks/sync.sh @@ -17,18 +17,18 @@ TARGET=${1} ### hyperfine --runs 20 --warmup 3 \ --prepare "virtualenv --clear .venv" \ - "./target/release/puffin pip-sync ${TARGET} --ignore-installed --no-cache" \ + "./target/release/puffin pip-sync ${TARGET} --no-cache" \ --prepare "rm -rf /tmp/site-packages" \ - "pip install -r ${TARGET} --target /tmp/site-packages --ignore-installed --no-cache-dir --no-deps" + "pip install -r ${TARGET} --target /tmp/site-packages --no-cache-dir --no-deps" ### # Installation with a warm cache, similar to blowing away and re-creating a virtual environment. ### hyperfine --runs 20 --warmup 3 \ --prepare "virtualenv --clear .venv" \ - "./target/release/puffin pip-sync ${TARGET} --ignore-installed" \ + "./target/release/puffin pip-sync ${TARGET}" \ --prepare "rm -rf /tmp/site-packages" \ - "pip install -r ${TARGET} --target /tmp/site-packages --ignore-installed --no-deps" + "pip install -r ${TARGET} --target /tmp/site-packages --no-deps" ### # Installation with all dependencies already installed (no-op).