Add support for parameterized link modes (#164)

Allows the user to select between clone, hardlink, and copy semantics
for installs. (The pnpm documentation has a decent description of what
these mean: https://pnpm.io/npmrc#package-import-method.)

Closes #159.
This commit is contained in:
Charlie Marsh 2023-10-22 00:35:50 -04:00 committed by GitHub
parent 9bcc7fe77a
commit 49a27ff33c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 363 additions and 88 deletions

View file

@ -17,14 +17,12 @@ pub use wheel::{
};
mod install_location;
pub mod linker;
#[cfg(feature = "python_bindings")]
mod python_bindings;
mod record;
#[cfg(any(target_os = "macos", target_os = "ios"))]
mod reflink;
mod script;
mod uninstall;
pub mod unpacked;
mod wheel;
#[derive(Error, Debug)]

View file

@ -27,6 +27,7 @@ use crate::{read_record_file, Error, Script};
pub fn install_wheel(
location: &InstallLocation<impl AsRef<Path>>,
wheel: impl AsRef<Path>,
link_mode: LinkMode,
) -> Result<(), Error> {
let base_location = location.venv_base();
@ -65,7 +66,7 @@ pub fn install_wheel(
// > 1.d Else unpack archive into platlib (site-packages).
// We always install in the same virtualenv site packages
debug!(name, "Extracting file");
let num_unpacked = unpack_wheel_files(&site_packages, &wheel)?;
let num_unpacked = link_mode.link_wheel_files(&site_packages, &wheel)?;
debug!(name, "Extracted {num_unpacked} files");
// Read the RECORD file.
@ -243,14 +244,51 @@ fn parse_scripts(
Ok((console_scripts, gui_scripts))
}
/// Extract all files from the wheel into the site packages.
#[cfg(any(target_os = "macos", target_os = "ios"))]
fn unpack_wheel_files(
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
pub enum LinkMode {
/// Clone (i.e., copy-on-write) packages from the wheel into the site packages.
Clone,
/// Copy packages from the wheel into the site packages.
Copy,
/// Hard link packages from the wheel into the site packages.
Hardlink,
}
impl Default for LinkMode {
fn default() -> Self {
if cfg!(any(target_os = "macos", target_os = "ios")) {
Self::Clone
} else {
Self::Hardlink
}
}
}
impl LinkMode {
/// Extract a wheel by linking all of its files into site packages.
pub fn link_wheel_files(
self,
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
match self {
Self::Clone => clone_wheel_files(site_packages, wheel),
Self::Copy => copy_wheel_files(site_packages, wheel),
Self::Hardlink => hardlink_wheel_files(site_packages, wheel),
}
}
}
/// Extract a wheel by cloning all of its files into site packages. The files will be cloned
/// via copy-on-write, which is similar to a hard link, but allows the files to be modified
/// independently (that is, the file is copied upon modification).
///
/// This method uses `clonefile` on macOS, and `reflink` on Linux.
fn clone_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
use crate::reflink::reflink;
let mut count = 0usize;
// On macOS, directly can be recursively copied with a single `clonefile` call.
@ -264,16 +302,12 @@ fn unpack_wheel_files(
.join(from.strip_prefix(&wheel).unwrap());
// Delete the destination if it already exists.
if let Ok(metadata) = to.metadata() {
if metadata.is_dir() {
fs::remove_dir_all(&to)?;
} else if metadata.is_file() {
fs::remove_file(&to)?;
}
}
fs::remove_dir_all(&to)
.or_else(|_| fs::remove_file(&to))
.ok();
// Copy the file.
reflink(&from, &to)?;
reflink_copy::reflink(&from, &to)?;
count += 1;
}
@ -281,9 +315,8 @@ fn unpack_wheel_files(
Ok(count)
}
/// Extract all files from the wheel into the site packages
#[cfg(not(any(target_os = "macos", target_os = "ios")))]
fn unpack_wheel_files(
/// Extract a wheel by copying all of its files into site packages.
fn copy_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
@ -300,7 +333,8 @@ fn unpack_wheel_files(
continue;
}
reflink_copy::reflink_or_copy(entry.path(), &out_path)?;
// Copy the file.
fs::copy(entry.path(), &out_path)?;
#[cfg(unix)]
{
@ -320,3 +354,30 @@ fn unpack_wheel_files(
Ok(count)
}
/// Extract a wheel by hard-linking all of its files into site packages.
fn hardlink_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
let mut count = 0usize;
// Walk over the directory.
for entry in walkdir::WalkDir::new(&wheel) {
let entry = entry?;
let relative = entry.path().strip_prefix(&wheel).unwrap();
let out_path = site_packages.as_ref().join(relative);
if entry.file_type().is_dir() {
fs::create_dir_all(&out_path)?;
continue;
}
// Copy the file.
fs::hard_link(entry.path(), &out_path)?;
count += 1;
}
Ok(count)
}

View file

@ -1,39 +0,0 @@
//! Reflink a file on macOS via `clonefile`.
use std::{
ffi::CString,
io,
os::{
raw::{c_char, c_int},
unix::ffi::OsStrExt,
},
path::Path,
};
fn cstr(path: &Path) -> io::Result<CString> {
Ok(CString::new(path.as_os_str().as_bytes())?)
}
// const CLONE_NOFOLLOW: c_int = 0x0001;
const CLONE_NOOWNERCOPY: c_int = 0x0002;
extern "C" {
// http://www.manpagez.com/man/2/clonefileat/
// https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/sys/clonefile.h
// TODO We need weak linkage here (OSX > 10.12, iOS > 10.0), otherwise compilation will fail on older versions
fn clonefile(src: *const c_char, dest: *const c_char, flags: c_int) -> c_int;
}
pub(crate) fn reflink(from: &Path, to: &Path) -> io::Result<()> {
let src = cstr(from)?;
let dest = cstr(to)?;
#[allow(unsafe_code)]
let ret = unsafe { clonefile(src.as_ptr(), dest.as_ptr(), CLONE_NOOWNERCOPY) };
if ret == -1 {
Err(io::Error::last_os_error())
} else {
Ok(())
}
}

View file

@ -3,6 +3,7 @@ use std::path::Path;
use anyhow::{Context, Result};
use colored::Colorize;
use install_wheel_rs::linker::LinkMode;
use itertools::Itertools;
use tracing::debug;
@ -27,6 +28,7 @@ use crate::requirements::RequirementsSource;
/// Install a set of locked requirements into the current Python environment.
pub(crate) async fn pip_sync(
sources: &[RequirementsSource],
link_mode: LinkMode,
cache: Option<&Path>,
mut printer: Printer,
) -> Result<ExitStatus> {
@ -42,12 +44,13 @@ pub(crate) async fn pip_sync(
return Ok(ExitStatus::Success);
}
sync_requirements(&requirements, cache, printer).await
sync_requirements(&requirements, link_mode, cache, printer).await
}
/// Install a set of locked requirements into the current Python environment.
pub(crate) async fn sync_requirements(
requirements: &[Requirement],
link_mode: LinkMode,
cache: Option<&Path>,
mut printer: Printer,
) -> Result<ExitStatus> {
@ -211,6 +214,7 @@ pub(crate) async fn sync_requirements(
if !wheels.is_empty() {
let start = std::time::Instant::now();
puffin_installer::Installer::new(&python)
.with_link_mode(link_mode)
.with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64))
.install(&wheels)?;

View file

@ -81,6 +81,10 @@ struct PipSyncArgs {
/// Include all packages listed in the given `requirements.txt` files.
#[clap(required(true))]
src_file: Vec<PathBuf>,
/// The method to use when installing packages from the global cache.
#[clap(long, value_enum)]
link_mode: Option<install_wheel_rs::linker::LinkMode>,
}
#[derive(Args)]
@ -174,7 +178,13 @@ async fn main() -> ExitCode {
.into_iter()
.map(RequirementsSource::from)
.collect::<Vec<_>>();
commands::pip_sync(&sources, cache_dir, printer).await
commands::pip_sync(
&sources,
args.link_mode.unwrap_or_default(),
cache_dir,
printer,
)
.await
}
Commands::PipUninstall(args) => {
let sources = args

View file

@ -47,7 +47,8 @@ fn missing_venv() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment.
/// Install a package into a virtual environment using the default link semantics. (On macOS,
/// this using `clone` semantics.)
#[test]
fn install() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
@ -92,6 +93,100 @@ fn install() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment using copy semantics.
#[test]
fn install_copy() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--link-mode")
.arg("copy")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import markupsafe")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}
/// Install a package into a virtual environment using hardlink semantics.
#[test]
fn install_hardlink() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--link-mode")
.arg("hardlink")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import markupsafe")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}
/// Install multiple packages into a virtual environment.
#[test]
fn install_many() -> Result<()> {
@ -385,3 +480,63 @@ fn install_sequential() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment, then install a second package into the same
/// virtual environment.
#[test]
fn upgrade() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("tomli==2.0.0")?;
Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir)
.assert()
.success();
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("tomli==2.0.1")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import tomli")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}

View file

@ -5,7 +5,10 @@ info:
args:
- pip-sync
- requirements.txt
- "--no-cache"
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpkNx6zh
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpvozar2/.venv
---
success: true
exit_code: 0

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--link-mode"
- copy
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpqNznsn
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp7lgVHK/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--link-mode"
- hardlink
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpGHO8A6
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpBPXcHk/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpVgm17b
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpDIXrmg/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- tomli@2.0.0
+ tomli@2.0.1

View file

@ -9,6 +9,7 @@ use crate::CachedDistribution;
pub struct Installer<'a> {
python: &'a PythonExecutable,
link_mode: install_wheel_rs::linker::LinkMode,
reporter: Option<Box<dyn Reporter>>,
}
@ -17,11 +18,18 @@ impl<'a> Installer<'a> {
pub fn new(python: &'a PythonExecutable) -> Self {
Self {
python,
link_mode: install_wheel_rs::linker::LinkMode::default(),
reporter: None,
}
}
/// Set the [`Reporter`] to use for this installer..
/// Set the [`Mode`] to use for this installer.
#[must_use]
pub fn with_link_mode(self, link_mode: install_wheel_rs::linker::LinkMode) -> Self {
Self { link_mode, ..self }
}
/// Set the [`Reporter`] to use for this installer.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
Self {
@ -39,7 +47,7 @@ impl<'a> Installer<'a> {
self.python.simple_version(),
);
install_wheel_rs::unpacked::install_wheel(&location, wheel.path())?;
install_wheel_rs::linker::install_wheel(&location, wheel.path(), self.link_mode)?;
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_install_progress(wheel.name(), wheel.version());