Remove packages when syncing (#135)

`pip-sync` will now uninstall any packages that aren't necessary.

Closes https://github.com/astral-sh/puffin/issues/128.
This commit is contained in:
Charlie Marsh 2023-10-19 00:14:20 -04:00 committed by GitHub
parent 41ece4184b
commit bd01fb490e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 215 additions and 132 deletions

View file

@ -5,7 +5,7 @@ pub(crate) use add::add;
pub(crate) use clean::clean; pub(crate) use clean::clean;
pub(crate) use freeze::freeze; pub(crate) use freeze::freeze;
pub(crate) use pip_compile::pip_compile; pub(crate) use pip_compile::pip_compile;
pub(crate) use pip_sync::{pip_sync, PipSyncFlags}; pub(crate) use pip_sync::pip_sync;
pub(crate) use pip_uninstall::pip_uninstall; pub(crate) use pip_uninstall::pip_uninstall;
pub(crate) use remove::remove; pub(crate) use remove::remove;
pub(crate) use venv::venv; pub(crate) use venv::venv;

View file

@ -2,8 +2,8 @@ use std::fmt::Write;
use std::path::Path; use std::path::Path;
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
use bitflags::bitflags;
use itertools::{Either, Itertools}; use itertools::Itertools;
use owo_colors::OwoColorize; use owo_colors::OwoColorize;
use pep508_rs::Requirement; use pep508_rs::Requirement;
use tracing::debug; use tracing::debug;
@ -12,7 +12,7 @@ use platform_host::Platform;
use platform_tags::Tags; use platform_tags::Tags;
use puffin_client::PypiClientBuilder; use puffin_client::PypiClientBuilder;
use puffin_installer::{LocalDistribution, LocalIndex, RemoteDistribution}; use puffin_installer::{LocalDistribution, LocalIndex, RemoteDistribution};
use puffin_interpreter::{PythonExecutable, SitePackages}; use puffin_interpreter::{Distribution, PythonExecutable, SitePackages};
use puffin_package::package_name::PackageName; use puffin_package::package_name::PackageName;
use puffin_package::requirements_txt::RequirementsTxt; use puffin_package::requirements_txt::RequirementsTxt;
use puffin_resolver::Resolution; use puffin_resolver::Resolution;
@ -23,19 +23,10 @@ use crate::commands::reporters::{
use crate::commands::{elapsed, ExitStatus}; use crate::commands::{elapsed, ExitStatus};
use crate::printer::Printer; use crate::printer::Printer;
bitflags! {
#[derive(Debug, Copy, Clone, Default)]
pub struct PipSyncFlags: u8 {
/// Ignore any installed packages, forcing a re-installation.
const IGNORE_INSTALLED = 1 << 0;
}
}
/// Install a set of locked requirements into the current Python environment. /// Install a set of locked requirements into the current Python environment.
pub(crate) async fn pip_sync( pub(crate) async fn pip_sync(
src: &Path, src: &Path,
cache: Option<&Path>, cache: Option<&Path>,
flags: PipSyncFlags,
mut printer: Printer, mut printer: Printer,
) -> Result<ExitStatus> { ) -> Result<ExitStatus> {
// Read the `requirements.txt` from disk. // Read the `requirements.txt` from disk.
@ -53,14 +44,13 @@ pub(crate) async fn pip_sync(
return Ok(ExitStatus::Success); return Ok(ExitStatus::Success);
} }
sync_requirements(&requirements, cache, flags, printer).await sync_requirements(&requirements, cache, printer).await
} }
/// Install a set of locked requirements into the current Python environment. /// Install a set of locked requirements into the current Python environment.
pub(crate) async fn sync_requirements( pub(crate) async fn sync_requirements(
requirements: &[Requirement], requirements: &[Requirement],
cache: Option<&Path>, cache: Option<&Path>,
flags: PipSyncFlags,
mut printer: Printer, mut printer: Printer,
) -> Result<ExitStatus> { ) -> Result<ExitStatus> {
// Audit the requirements. // Audit the requirements.
@ -74,15 +64,16 @@ pub(crate) async fn sync_requirements(
python.executable().display() python.executable().display()
); );
// Determine the current environment markers. // Partition into those that should be linked from the cache (`local`), those that need to be
let tags = Tags::from_env(python.platform(), python.simple_version())?; // downloaded (`remote`), and those that should be removed (`extraneous`).
let PartitionedRequirements {
// Filter out any already-installed or already-cached packages. local,
let (cached, uncached) = remote,
find_uncached_requirements(requirements, cache, flags, &python).await?; extraneous,
} = PartitionedRequirements::try_from_requirements(requirements, cache, &python).await?;
// Nothing to do. // Nothing to do.
if uncached.is_empty() && cached.is_empty() { if remote.is_empty() && local.is_empty() && extraneous.is_empty() {
let s = if requirements.len() == 1 { "" } else { "s" }; let s = if requirements.len() == 1 { "" } else { "s" };
writeln!( writeln!(
printer, printer,
@ -98,17 +89,19 @@ pub(crate) async fn sync_requirements(
return Ok(ExitStatus::Success); return Ok(ExitStatus::Success);
} }
// Determine the current environment markers.
let tags = Tags::from_env(python.platform(), python.simple_version())?;
let client = PypiClientBuilder::default().cache(cache).build(); let client = PypiClientBuilder::default().cache(cache).build();
// Resolve the dependencies. // Resolve the dependencies.
let resolution = if uncached.is_empty() { let resolution = if remote.is_empty() {
Resolution::default() Resolution::default()
} else { } else {
let start = std::time::Instant::now(); let start = std::time::Instant::now();
let wheel_finder = puffin_resolver::WheelFinder::new(&tags, &client) let wheel_finder = puffin_resolver::WheelFinder::new(&tags, &client)
.with_reporter(WheelFinderReporter::from(printer).with_length(uncached.len() as u64)); .with_reporter(WheelFinderReporter::from(printer).with_length(remote.len() as u64));
let resolution = wheel_finder.resolve(&uncached).await?; let resolution = wheel_finder.resolve(&remote).await?;
let s = if resolution.len() == 1 { "" } else { "s" }; let s = if resolution.len() == 1 { "" } else { "s" };
writeln!( writeln!(
@ -125,13 +118,12 @@ pub(crate) async fn sync_requirements(
resolution resolution
}; };
// Download any missing distributions.
let staging = tempfile::tempdir()?;
let uncached = resolution let uncached = resolution
.into_files() .into_files()
.map(RemoteDistribution::from_file) .map(RemoteDistribution::from_file)
.collect::<Result<Vec<_>>>()?; .collect::<Result<Vec<_>>>()?;
let staging = tempfile::tempdir()?;
// Download any missing distributions.
let downloads = if uncached.is_empty() { let downloads = if uncached.is_empty() {
vec![] vec![]
} else { } else {
@ -188,89 +180,147 @@ pub(crate) async fn sync_requirements(
unzips unzips
}; };
// Install the resolved distributions. // Remove any unnecessary packages.
let start = std::time::Instant::now(); if !extraneous.is_empty() {
let wheels = unzips.into_iter().chain(cached).collect::<Vec<_>>(); let start = std::time::Instant::now();
puffin_installer::Installer::new(&python)
.with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64))
.install(&wheels)?;
let s = if wheels.len() == 1 { "" } else { "s" }; for dist_info in &extraneous {
writeln!( let summary = puffin_installer::uninstall(dist_info).await?;
printer, debug!(
"{}", "Uninstalled {} ({} file{}, {} director{})",
format!( dist_info.name(),
"Installed {} in {}", summary.file_count,
format!("{} package{}", wheels.len(), s).bold(), if summary.file_count == 1 { "" } else { "s" },
elapsed(start.elapsed()) summary.dir_count,
) if summary.dir_count == 1 { "y" } else { "ies" },
.dimmed() );
)?; }
for wheel in wheels { let s = if extraneous.len() == 1 { "" } else { "s" };
writeln!( writeln!(
printer, printer,
" {} {}{}", "{}",
"+".green(), format!(
wheel.name().as_ref().white().bold(), "Uninstalled {} in {}",
format!("@{}", wheel.version()).dimmed() format!("{} package{}", extraneous.len(), s).bold(),
elapsed(start.elapsed())
)
.dimmed()
)?; )?;
} }
// Install the resolved distributions.
let wheels = unzips.into_iter().chain(local).collect::<Vec<_>>();
if !wheels.is_empty() {
let start = std::time::Instant::now();
puffin_installer::Installer::new(&python)
.with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64))
.install(&wheels)?;
let s = if wheels.len() == 1 { "" } else { "s" };
writeln!(
printer,
"{}",
format!(
"Installed {} in {}",
format!("{} package{}", wheels.len(), s).bold(),
elapsed(start.elapsed())
)
.dimmed()
)?;
}
for dist in extraneous
.iter()
.map(|dist_info| PackageModification {
name: dist_info.name(),
version: dist_info.version(),
modification: Modification::Remove,
})
.chain(wheels.iter().map(|dist_info| PackageModification {
name: dist_info.name(),
version: dist_info.version(),
modification: Modification::Add,
}))
.sorted_unstable_by_key(|modification| modification.name)
{
match dist.modification {
Modification::Add => {
writeln!(
printer,
" {} {}{}",
"+".green(),
dist.name.as_ref().white().bold(),
format!("@{}", dist.version).dimmed()
)?;
}
Modification::Remove => {
writeln!(
printer,
" {} {}{}",
"-".red(),
dist.name.as_ref().white().bold(),
format!("@{}", dist.version).dimmed()
)?;
}
}
}
Ok(ExitStatus::Success) Ok(ExitStatus::Success)
} }
async fn find_uncached_requirements( #[derive(Debug, Default)]
requirements: &[Requirement], struct PartitionedRequirements {
cache: Option<&Path>, /// The distributions that are not already installed in the current environment, but are
flags: PipSyncFlags, /// available in the local cache.
python: &PythonExecutable, local: Vec<LocalDistribution>,
) -> Result<(Vec<LocalDistribution>, Vec<Requirement>)> {
// Index all the already-installed packages in site-packages.
let site_packages = if flags.intersects(PipSyncFlags::IGNORE_INSTALLED) {
SitePackages::default()
} else {
SitePackages::from_executable(python).await?
};
// Index all the already-downloaded wheels in the cache. /// The distributions that are not already installed in the current environment, and are
let local_index = if let Some(cache) = cache { /// not available in the local cache.
LocalIndex::from_directory(cache).await? remote: Vec<Requirement>,
} else {
LocalIndex::default()
};
Ok(split_uncached_requirements( /// The distributions that are already installed in the current environment, and are
requirements, /// _not_ necessary to satisfy the requirements.
&site_packages, extraneous: Vec<Distribution>,
&local_index,
))
} }
fn split_uncached_requirements( impl PartitionedRequirements {
requirements: &[Requirement], /// Partition a set of requirements into those that should be linked from the cache, those that
site_packages: &SitePackages, /// need to be downloaded, and those that should be removed.
local_index: &LocalIndex, pub(crate) async fn try_from_requirements(
) -> (Vec<LocalDistribution>, Vec<Requirement>) { requirements: &[Requirement],
requirements cache: Option<&Path>,
.iter() python: &PythonExecutable,
.filter(|requirement| { ) -> Result<Self> {
// Index all the already-installed packages in site-packages.
let mut site_packages = SitePackages::from_executable(python).await?;
// Index all the already-downloaded wheels in the cache.
let local_index = if let Some(cache) = cache {
LocalIndex::from_directory(cache).await?
} else {
LocalIndex::default()
};
let mut local = vec![];
let mut remote = vec![];
let mut extraneous = vec![];
for requirement in requirements {
let package = PackageName::normalize(&requirement.name); let package = PackageName::normalize(&requirement.name);
// Filter out already-installed packages. // Filter out already-installed packages.
if let Some(dist_info) = site_packages.get(&package) { if let Some(dist) = site_packages.remove(&package) {
debug!( if requirement.is_satisfied_by(dist.version()) {
"Requirement already satisfied: {} ({})", debug!(
package, "Requirement already satisfied: {} ({})",
dist_info.version() package,
); dist.version()
false );
} else { continue;
true }
extraneous.push(dist);
} }
})
.partition_map(|requirement| {
let package = PackageName::normalize(&requirement.name);
// Identify any locally-available distributions that satisfy the requirement. // Identify any locally-available distributions that satisfy the requirement.
if let Some(distribution) = local_index if let Some(distribution) = local_index
@ -282,10 +332,38 @@ fn split_uncached_requirements(
distribution.name(), distribution.name(),
distribution.version() distribution.version()
); );
Either::Left(distribution.clone()) local.push(distribution.clone());
} else { } else {
debug!("Identified uncached requirement: {}", requirement); debug!("Identified uncached requirement: {}", requirement);
Either::Right(requirement.clone()) remote.push(requirement.clone());
} }
}
// Remove any unnecessary packages.
for (package, dist_info) in site_packages {
debug!("Unnecessary package: {} ({})", package, dist_info.version());
extraneous.push(dist_info);
}
Ok(PartitionedRequirements {
local,
remote,
extraneous,
}) })
}
}
#[derive(Debug)]
enum Modification {
/// The package was added to the environment.
Add,
/// The package was removed from the environment.
Remove,
}
#[derive(Debug)]
struct PackageModification<'a> {
name: &'a PackageName,
version: &'a pep440_rs::Version,
modification: Modification,
} }

View file

@ -53,11 +53,11 @@ pub(crate) async fn pip_uninstall(
}; };
// Map to the local distributions. // Map to the local distributions.
let dist_infos = packages let distributions = packages
.iter() .iter()
.filter_map(|package| { .filter_map(|package| {
if let Some(dist_info) = site_packages.get(package) { if let Some(distribution) = site_packages.get(package) {
Some(dist_info) Some(distribution)
} else { } else {
let _ = writeln!( let _ = writeln!(
printer, printer,
@ -71,7 +71,7 @@ pub(crate) async fn pip_uninstall(
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
if dist_infos.is_empty() { if distributions.is_empty() {
writeln!( writeln!(
printer, printer,
"{}{} No packages to uninstall.", "{}{} No packages to uninstall.",
@ -82,11 +82,11 @@ pub(crate) async fn pip_uninstall(
} }
// Uninstall each package. // Uninstall each package.
for dist_info in &dist_infos { for distribution in &distributions {
let summary = puffin_installer::uninstall(dist_info).await?; let summary = puffin_installer::uninstall(distribution).await?;
debug!( debug!(
"Uninstalled {} ({} file{}, {} director{})", "Uninstalled {} ({} file{}, {} director{})",
dist_info.name(), distribution.name(),
summary.file_count, summary.file_count,
if summary.file_count == 1 { "" } else { "s" }, if summary.file_count == 1 { "" } else { "s" },
summary.dir_count, summary.dir_count,
@ -101,8 +101,8 @@ pub(crate) async fn pip_uninstall(
"Uninstalled {} in {}", "Uninstalled {} in {}",
format!( format!(
"{} package{}", "{} package{}",
dist_infos.len(), distributions.len(),
if dist_infos.len() == 1 { "" } else { "s" } if distributions.len() == 1 { "" } else { "s" }
) )
.bold(), .bold(),
elapsed(start.elapsed()) elapsed(start.elapsed())
@ -110,13 +110,13 @@ pub(crate) async fn pip_uninstall(
.dimmed() .dimmed()
)?; )?;
for dist_info in dist_infos { for distribution in distributions {
writeln!( writeln!(
printer, printer,
" {} {}{}", " {} {}{}",
"-".red(), "-".red(),
dist_info.name().as_ref().white().bold(), distribution.name().as_ref().white().bold(),
format!("@{}", dist_info.version()).dimmed() format!("@{}", distribution.version()).dimmed()
)?; )?;
} }

View file

@ -66,10 +66,6 @@ struct PipCompileArgs {
struct PipSyncArgs { struct PipSyncArgs {
/// Path to the `requirements.txt` file to install. /// Path to the `requirements.txt` file to install.
src: PathBuf, src: PathBuf,
/// Ignore any installed packages, forcing a re-installation.
#[arg(long)]
ignore_installed: bool,
} }
#[derive(Args)] #[derive(Args)]
@ -145,11 +141,6 @@ async fn main() -> ExitCode {
dirs.as_ref() dirs.as_ref()
.map(ProjectDirs::cache_dir) .map(ProjectDirs::cache_dir)
.filter(|_| !cli.no_cache), .filter(|_| !cli.no_cache),
if args.ignore_installed {
commands::PipSyncFlags::IGNORE_INSTALLED
} else {
commands::PipSyncFlags::empty()
},
printer, printer,
) )
.await .await

View file

@ -1,11 +1,11 @@
use anyhow::Result; use anyhow::Result;
use puffin_interpreter::DistInfo; use puffin_interpreter::Distribution;
/// Uninstall a package from the specified Python environment. /// Uninstall a package from the specified Python environment.
pub async fn uninstall(dist_info: &DistInfo) -> Result<install_wheel_rs::Uninstall> { pub async fn uninstall(distribution: &Distribution) -> Result<install_wheel_rs::Uninstall> {
let uninstall = tokio::task::spawn_blocking({ let uninstall = tokio::task::spawn_blocking({
let path = dist_info.path().to_owned(); let path = distribution.path().to_owned();
move || install_wheel_rs::uninstall_wheel(&path) move || install_wheel_rs::uninstall_wheel(&path)
}) })
.await??; .await??;

View file

@ -7,7 +7,7 @@ use pep508_rs::MarkerEnvironment;
use platform_host::Platform; use platform_host::Platform;
use crate::python_platform::PythonPlatform; use crate::python_platform::PythonPlatform;
pub use crate::site_packages::{DistInfo, SitePackages}; pub use crate::site_packages::{Distribution, SitePackages};
mod markers; mod markers;
mod python_platform; mod python_platform;

View file

@ -11,7 +11,7 @@ use puffin_package::package_name::PackageName;
use crate::PythonExecutable; use crate::PythonExecutable;
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct SitePackages(BTreeMap<PackageName, DistInfo>); pub struct SitePackages(BTreeMap<PackageName, Distribution>);
impl SitePackages { impl SitePackages {
/// Build an index of installed packages from the given Python executable. /// Build an index of installed packages from the given Python executable.
@ -21,7 +21,7 @@ impl SitePackages {
let mut dir = fs::read_dir(python.site_packages()).await?; let mut dir = fs::read_dir(python.site_packages()).await?;
while let Some(entry) = dir.next_entry().await? { while let Some(entry) = dir.next_entry().await? {
if entry.file_type().await?.is_dir() { if entry.file_type().await?.is_dir() {
if let Some(dist_info) = DistInfo::try_from_path(&entry.path())? { if let Some(dist_info) = Distribution::try_from_path(&entry.path())? {
index.insert(dist_info.name().clone(), dist_info); index.insert(dist_info.name().clone(), dist_info);
} }
} }
@ -31,24 +31,38 @@ impl SitePackages {
} }
/// Returns an iterator over the installed packages. /// Returns an iterator over the installed packages.
pub fn iter(&self) -> impl Iterator<Item = (&PackageName, &DistInfo)> { pub fn iter(&self) -> impl Iterator<Item = (&PackageName, &Distribution)> {
self.0.iter() self.0.iter()
} }
/// Returns the version of the given package, if it is installed. /// Returns the version of the given package, if it is installed.
pub fn get(&self, name: &PackageName) -> Option<&DistInfo> { pub fn get(&self, name: &PackageName) -> Option<&Distribution> {
self.0.get(name) self.0.get(name)
} }
/// Remove the given package from the index, returning its version if it was installed.
pub fn remove(&mut self, name: &PackageName) -> Option<Distribution> {
self.0.remove(name)
}
} }
#[derive(Debug)] impl IntoIterator for SitePackages {
pub struct DistInfo { type Item = (PackageName, Distribution);
type IntoIter = std::collections::btree_map::IntoIter<PackageName, Distribution>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
#[derive(Debug, Clone)]
pub struct Distribution {
name: PackageName, name: PackageName,
version: Version, version: Version,
path: PathBuf, path: PathBuf,
} }
impl DistInfo { impl Distribution {
/// Try to parse a (potential) `dist-info` directory into a package name and version. /// Try to parse a (potential) `dist-info` directory into a package name and version.
/// ///
/// See: <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#recording-installed-packages> /// See: <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#recording-installed-packages>
@ -68,7 +82,7 @@ impl DistInfo {
let version = Version::from_str(version).map_err(|err| anyhow!(err))?; let version = Version::from_str(version).map_err(|err| anyhow!(err))?;
let path = path.to_path_buf(); let path = path.to_path_buf();
return Ok(Some(DistInfo { return Ok(Some(Distribution {
name, name,
version, version,
path, path,

View file

@ -17,18 +17,18 @@ TARGET=${1}
### ###
hyperfine --runs 20 --warmup 3 \ hyperfine --runs 20 --warmup 3 \
--prepare "virtualenv --clear .venv" \ --prepare "virtualenv --clear .venv" \
"./target/release/puffin pip-sync ${TARGET} --ignore-installed --no-cache" \ "./target/release/puffin pip-sync ${TARGET} --no-cache" \
--prepare "rm -rf /tmp/site-packages" \ --prepare "rm -rf /tmp/site-packages" \
"pip install -r ${TARGET} --target /tmp/site-packages --ignore-installed --no-cache-dir --no-deps" "pip install -r ${TARGET} --target /tmp/site-packages --no-cache-dir --no-deps"
### ###
# Installation with a warm cache, similar to blowing away and re-creating a virtual environment. # Installation with a warm cache, similar to blowing away and re-creating a virtual environment.
### ###
hyperfine --runs 20 --warmup 3 \ hyperfine --runs 20 --warmup 3 \
--prepare "virtualenv --clear .venv" \ --prepare "virtualenv --clear .venv" \
"./target/release/puffin pip-sync ${TARGET} --ignore-installed" \ "./target/release/puffin pip-sync ${TARGET}" \
--prepare "rm -rf /tmp/site-packages" \ --prepare "rm -rf /tmp/site-packages" \
"pip install -r ${TARGET} --target /tmp/site-packages --ignore-installed --no-deps" "pip install -r ${TARGET} --target /tmp/site-packages --no-deps"
### ###
# Installation with all dependencies already installed (no-op). # Installation with all dependencies already installed (no-op).