Add support for parameterized link modes (#164)

Allows the user to select between clone, hardlink, and copy semantics
for installs. (The pnpm documentation has a decent description of what
these mean: https://pnpm.io/npmrc#package-import-method.)

Closes #159.
This commit is contained in:
Charlie Marsh 2023-10-22 00:35:50 -04:00 committed by GitHub
parent 9bcc7fe77a
commit 49a27ff33c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 363 additions and 88 deletions

42
Cargo.lock generated
View file

@ -518,9 +518,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "cpufeatures"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
checksum = "3fbc60abd742b35f2492f808e1abbb83d45f72db402e14c55057edc9c7b1e9e4"
dependencies = [
"libc",
]
@ -1045,9 +1045,9 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.14.1"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12"
checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156"
[[package]]
name = "heck"
@ -1183,7 +1183,7 @@ dependencies = [
"httpdate",
"itoa",
"pin-project-lite",
"socket2 0.4.9",
"socket2 0.4.10",
"tokio",
"tower-service",
"tracing",
@ -1270,7 +1270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897"
dependencies = [
"equivalent",
"hashbrown 0.14.1",
"hashbrown 0.14.2",
"serde",
]
@ -2581,9 +2581,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.38.19"
version = "0.38.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0"
dependencies = [
"bitflags 2.4.1",
"errno",
@ -2800,9 +2800,9 @@ checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
[[package]]
name = "socket2"
version = "0.4.9"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
dependencies = [
"libc",
"winapi",
@ -2810,9 +2810,9 @@ dependencies = [
[[package]]
name = "socket2"
version = "0.5.4"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e"
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
dependencies = [
"libc",
"windows-sys 0.48.0",
@ -2938,9 +2938,9 @@ dependencies = [
[[package]]
name = "target-lexicon"
version = "0.12.11"
version = "0.12.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a"
checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a"
[[package]]
name = "task-local-extensions"
@ -3037,18 +3037,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.49"
version = "1.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.49"
version = "1.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
dependencies = [
"proc-macro2",
"quote",
@ -3130,7 +3130,7 @@ dependencies = [
"mio",
"num_cpus",
"pin-project-lite",
"socket2 0.5.4",
"socket2 0.5.5",
"tokio-macros",
"windows-sys 0.48.0",
]
@ -3249,9 +3249,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "tracing"
version = "0.1.39"
version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee2ef2af84856a50c1d430afce2fdded0a4ec7eda868db86409b4543df0797f9"
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
dependencies = [
"log",
"pin-project-lite",

View file

@ -44,7 +44,7 @@ platform-info = { version = "2.0.2" }
plist = { version = "1.5.0" }
pyproject-toml = { version = "0.7.0" }
rayon = { version = "1.8.0" }
reflink-copy = { version = "0.1.9" }
reflink-copy = { version = "0.1.10" }
regex = { version = "1.9.6" }
reqwest = { version = "0.11.22", features = ["json", "gzip", "brotli", "stream"] }
reqwest-middleware = { version = "0.2.3" }

View file

@ -17,14 +17,12 @@ pub use wheel::{
};
mod install_location;
pub mod linker;
#[cfg(feature = "python_bindings")]
mod python_bindings;
mod record;
#[cfg(any(target_os = "macos", target_os = "ios"))]
mod reflink;
mod script;
mod uninstall;
pub mod unpacked;
mod wheel;
#[derive(Error, Debug)]

View file

@ -27,6 +27,7 @@ use crate::{read_record_file, Error, Script};
pub fn install_wheel(
location: &InstallLocation<impl AsRef<Path>>,
wheel: impl AsRef<Path>,
link_mode: LinkMode,
) -> Result<(), Error> {
let base_location = location.venv_base();
@ -65,7 +66,7 @@ pub fn install_wheel(
// > 1.d Else unpack archive into platlib (site-packages).
// We always install in the same virtualenv site packages
debug!(name, "Extracting file");
let num_unpacked = unpack_wheel_files(&site_packages, &wheel)?;
let num_unpacked = link_mode.link_wheel_files(&site_packages, &wheel)?;
debug!(name, "Extracted {num_unpacked} files");
// Read the RECORD file.
@ -243,14 +244,51 @@ fn parse_scripts(
Ok((console_scripts, gui_scripts))
}
/// Extract all files from the wheel into the site packages.
#[cfg(any(target_os = "macos", target_os = "ios"))]
fn unpack_wheel_files(
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
pub enum LinkMode {
/// Clone (i.e., copy-on-write) packages from the wheel into the site packages.
Clone,
/// Copy packages from the wheel into the site packages.
Copy,
/// Hard link packages from the wheel into the site packages.
Hardlink,
}
impl Default for LinkMode {
fn default() -> Self {
if cfg!(any(target_os = "macos", target_os = "ios")) {
Self::Clone
} else {
Self::Hardlink
}
}
}
impl LinkMode {
/// Extract a wheel by linking all of its files into site packages.
pub fn link_wheel_files(
self,
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
use crate::reflink::reflink;
match self {
Self::Clone => clone_wheel_files(site_packages, wheel),
Self::Copy => copy_wheel_files(site_packages, wheel),
Self::Hardlink => hardlink_wheel_files(site_packages, wheel),
}
}
}
/// Extract a wheel by cloning all of its files into site packages. The files will be cloned
/// via copy-on-write, which is similar to a hard link, but allows the files to be modified
/// independently (that is, the file is copied upon modification).
///
/// This method uses `clonefile` on macOS, and `reflink` on Linux.
fn clone_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
let mut count = 0usize;
// On macOS, directly can be recursively copied with a single `clonefile` call.
@ -264,16 +302,12 @@ fn unpack_wheel_files(
.join(from.strip_prefix(&wheel).unwrap());
// Delete the destination if it already exists.
if let Ok(metadata) = to.metadata() {
if metadata.is_dir() {
fs::remove_dir_all(&to)?;
} else if metadata.is_file() {
fs::remove_file(&to)?;
}
}
fs::remove_dir_all(&to)
.or_else(|_| fs::remove_file(&to))
.ok();
// Copy the file.
reflink(&from, &to)?;
reflink_copy::reflink(&from, &to)?;
count += 1;
}
@ -281,9 +315,8 @@ fn unpack_wheel_files(
Ok(count)
}
/// Extract all files from the wheel into the site packages
#[cfg(not(any(target_os = "macos", target_os = "ios")))]
fn unpack_wheel_files(
/// Extract a wheel by copying all of its files into site packages.
fn copy_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
@ -300,7 +333,8 @@ fn unpack_wheel_files(
continue;
}
reflink_copy::reflink_or_copy(entry.path(), &out_path)?;
// Copy the file.
fs::copy(entry.path(), &out_path)?;
#[cfg(unix)]
{
@ -320,3 +354,30 @@ fn unpack_wheel_files(
Ok(count)
}
/// Extract a wheel by hard-linking all of its files into site packages.
fn hardlink_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
let mut count = 0usize;
// Walk over the directory.
for entry in walkdir::WalkDir::new(&wheel) {
let entry = entry?;
let relative = entry.path().strip_prefix(&wheel).unwrap();
let out_path = site_packages.as_ref().join(relative);
if entry.file_type().is_dir() {
fs::create_dir_all(&out_path)?;
continue;
}
// Copy the file.
fs::hard_link(entry.path(), &out_path)?;
count += 1;
}
Ok(count)
}

View file

@ -1,39 +0,0 @@
//! Reflink a file on macOS via `clonefile`.
use std::{
ffi::CString,
io,
os::{
raw::{c_char, c_int},
unix::ffi::OsStrExt,
},
path::Path,
};
fn cstr(path: &Path) -> io::Result<CString> {
Ok(CString::new(path.as_os_str().as_bytes())?)
}
// const CLONE_NOFOLLOW: c_int = 0x0001;
const CLONE_NOOWNERCOPY: c_int = 0x0002;
extern "C" {
// http://www.manpagez.com/man/2/clonefileat/
// https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/sys/clonefile.h
// TODO We need weak linkage here (OSX > 10.12, iOS > 10.0), otherwise compilation will fail on older versions
fn clonefile(src: *const c_char, dest: *const c_char, flags: c_int) -> c_int;
}
pub(crate) fn reflink(from: &Path, to: &Path) -> io::Result<()> {
let src = cstr(from)?;
let dest = cstr(to)?;
#[allow(unsafe_code)]
let ret = unsafe { clonefile(src.as_ptr(), dest.as_ptr(), CLONE_NOOWNERCOPY) };
if ret == -1 {
Err(io::Error::last_os_error())
} else {
Ok(())
}
}

View file

@ -3,6 +3,7 @@ use std::path::Path;
use anyhow::{Context, Result};
use colored::Colorize;
use install_wheel_rs::linker::LinkMode;
use itertools::Itertools;
use tracing::debug;
@ -27,6 +28,7 @@ use crate::requirements::RequirementsSource;
/// Install a set of locked requirements into the current Python environment.
pub(crate) async fn pip_sync(
sources: &[RequirementsSource],
link_mode: LinkMode,
cache: Option<&Path>,
mut printer: Printer,
) -> Result<ExitStatus> {
@ -42,12 +44,13 @@ pub(crate) async fn pip_sync(
return Ok(ExitStatus::Success);
}
sync_requirements(&requirements, cache, printer).await
sync_requirements(&requirements, link_mode, cache, printer).await
}
/// Install a set of locked requirements into the current Python environment.
pub(crate) async fn sync_requirements(
requirements: &[Requirement],
link_mode: LinkMode,
cache: Option<&Path>,
mut printer: Printer,
) -> Result<ExitStatus> {
@ -211,6 +214,7 @@ pub(crate) async fn sync_requirements(
if !wheels.is_empty() {
let start = std::time::Instant::now();
puffin_installer::Installer::new(&python)
.with_link_mode(link_mode)
.with_reporter(InstallReporter::from(printer).with_length(wheels.len() as u64))
.install(&wheels)?;

View file

@ -81,6 +81,10 @@ struct PipSyncArgs {
/// Include all packages listed in the given `requirements.txt` files.
#[clap(required(true))]
src_file: Vec<PathBuf>,
/// The method to use when installing packages from the global cache.
#[clap(long, value_enum)]
link_mode: Option<install_wheel_rs::linker::LinkMode>,
}
#[derive(Args)]
@ -174,7 +178,13 @@ async fn main() -> ExitCode {
.into_iter()
.map(RequirementsSource::from)
.collect::<Vec<_>>();
commands::pip_sync(&sources, cache_dir, printer).await
commands::pip_sync(
&sources,
args.link_mode.unwrap_or_default(),
cache_dir,
printer,
)
.await
}
Commands::PipUninstall(args) => {
let sources = args

View file

@ -47,7 +47,8 @@ fn missing_venv() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment.
/// Install a package into a virtual environment using the default link semantics. (On macOS,
/// this using `clone` semantics.)
#[test]
fn install() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
@ -92,6 +93,100 @@ fn install() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment using copy semantics.
#[test]
fn install_copy() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--link-mode")
.arg("copy")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import markupsafe")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}
/// Install a package into a virtual environment using hardlink semantics.
#[test]
fn install_hardlink() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("MarkupSafe==2.1.3")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--link-mode")
.arg("hardlink")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import markupsafe")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}
/// Install multiple packages into a virtual environment.
#[test]
fn install_many() -> Result<()> {
@ -385,3 +480,63 @@ fn install_sequential() -> Result<()> {
Ok(())
}
/// Install a package into a virtual environment, then install a second package into the same
/// virtual environment.
#[test]
fn upgrade() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let cache_dir = assert_fs::TempDir::new()?;
let venv = temp_dir.child(".venv");
Command::new(get_cargo_bin(BIN_NAME))
.arg("venv")
.arg(venv.as_os_str())
.arg("--cache-dir")
.arg(cache_dir.path())
.current_dir(&temp_dir)
.assert()
.success();
venv.assert(predicates::path::is_dir());
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("tomli==2.0.0")?;
Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir)
.assert()
.success();
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("tomli==2.0.1")?;
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-sync")
.arg("requirements.txt")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Command::new(venv.join("bin").join("python"))
.arg("-c")
.arg("import tomli")
.current_dir(&temp_dir)
.assert()
.success();
Ok(())
}

View file

@ -5,7 +5,10 @@ info:
args:
- pip-sync
- requirements.txt
- "--no-cache"
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpkNx6zh
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpvozar2/.venv
---
success: true
exit_code: 0

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--link-mode"
- copy
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpqNznsn
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmp7lgVHK/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--link-mode"
- hardlink
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpGHO8A6
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpBPXcHk/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Installed 1 package in [TIME]
+ markupsafe@2.1.3

View file

@ -0,0 +1,25 @@
---
source: crates/puffin-cli/tests/pip_sync.rs
info:
program: puffin
args:
- pip-sync
- requirements.txt
- "--cache-dir"
- /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpVgm17b
env:
VIRTUAL_ENV: /var/folders/nt/6gf2v7_s3k13zq_t3944rwz40000gn/T/.tmpDIXrmg/.venv
---
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Unzipped 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- tomli@2.0.0
+ tomli@2.0.1

View file

@ -9,6 +9,7 @@ use crate::CachedDistribution;
pub struct Installer<'a> {
python: &'a PythonExecutable,
link_mode: install_wheel_rs::linker::LinkMode,
reporter: Option<Box<dyn Reporter>>,
}
@ -17,11 +18,18 @@ impl<'a> Installer<'a> {
pub fn new(python: &'a PythonExecutable) -> Self {
Self {
python,
link_mode: install_wheel_rs::linker::LinkMode::default(),
reporter: None,
}
}
/// Set the [`Reporter`] to use for this installer..
/// Set the [`Mode`] to use for this installer.
#[must_use]
pub fn with_link_mode(self, link_mode: install_wheel_rs::linker::LinkMode) -> Self {
Self { link_mode, ..self }
}
/// Set the [`Reporter`] to use for this installer.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
Self {
@ -39,7 +47,7 @@ impl<'a> Installer<'a> {
self.python.simple_version(),
);
install_wheel_rs::unpacked::install_wheel(&location, wheel.path())?;
install_wheel_rs::linker::install_wheel(&location, wheel.path(), self.link_mode)?;
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_install_progress(wheel.name(), wheel.version());