From adbee4fb32022a5cd1020fed3cd56b5b77bdaeb4 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 8 Oct 2023 17:44:02 -0400 Subject: [PATCH] Use recursive `clonefile` calls on macOS (#67) It turns out that on macOS, you can pass `clonefile` a directory to recursively copy an entire directory. This speeds up wheel installation dramatically, by about 3x. --- crates/install-wheel-rs/src/lib.rs | 2 ++ crates/install-wheel-rs/src/reflink.rs | 39 ++++++++++++++++++++++ crates/install-wheel-rs/src/unpacked.rs | 43 +++++++++++++++++++++---- 3 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 crates/install-wheel-rs/src/reflink.rs diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index ddbc8cfc3..9685ec828 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -21,6 +21,8 @@ mod install_location; #[cfg(feature = "python_bindings")] mod python_bindings; mod record; +#[cfg(any(target_os = "macos", target_os = "ios"))] +mod reflink; mod script; pub mod unpacked; mod wheel; diff --git a/crates/install-wheel-rs/src/reflink.rs b/crates/install-wheel-rs/src/reflink.rs new file mode 100644 index 000000000..797718295 --- /dev/null +++ b/crates/install-wheel-rs/src/reflink.rs @@ -0,0 +1,39 @@ +//! Reflink a file on macOS via `clonefile`. + +use std::{ + ffi::CString, + io, + os::{ + raw::{c_char, c_int}, + unix::ffi::OsStrExt, + }, + path::Path, +}; + +fn cstr(path: &Path) -> io::Result { + Ok(CString::new(path.as_os_str().as_bytes())?) +} + +// const CLONE_NOFOLLOW: c_int = 0x0001; +const CLONE_NOOWNERCOPY: c_int = 0x0002; + +extern "C" { + // http://www.manpagez.com/man/2/clonefileat/ + // https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/sys/clonefile.h + // TODO We need weak linkage here (OSX > 10.12, iOS > 10.0), otherwise compilation will fail on older versions + fn clonefile(src: *const c_char, dest: *const c_char, flags: c_int) -> c_int; +} + +pub(crate) fn reflink(from: &Path, to: &Path) -> io::Result<()> { + let src = cstr(from)?; + let dest = cstr(to)?; + + #[allow(unsafe_code)] + let ret = unsafe { clonefile(src.as_ptr(), dest.as_ptr(), CLONE_NOOWNERCOPY) }; + + if ret == -1 { + Err(io::Error::last_os_error()) + } else { + Ok(()) + } +} diff --git a/crates/install-wheel-rs/src/unpacked.rs b/crates/install-wheel-rs/src/unpacked.rs index 749e4dd3b..87f4a82c9 100644 --- a/crates/install-wheel-rs/src/unpacked.rs +++ b/crates/install-wheel-rs/src/unpacked.rs @@ -9,7 +9,6 @@ use fs_err as fs; use fs_err::File; use mailparse::MailHeaderMap; use tracing::{debug, span, Level}; -use walkdir::WalkDir; use wheel_filename::WheelFilename; @@ -78,7 +77,7 @@ pub fn install_wheel( // We always install in the same virtualenv site packages debug!(name = name.as_str(), "Extracting file"); let num_unpacked = unpack_wheel_files(&site_packages, wheel)?; - debug!(name = name.as_str(), "Extracted {num_unpacked} files",); + debug!(name = name.as_str(), "Extracted {num_unpacked} files"); // Read the RECORD file. let mut record_file = File::open(&wheel.join(format!("{dist_info_prefix}.dist-info/RECORD")))?; @@ -244,16 +243,46 @@ fn parse_scripts( Ok((console_scripts, gui_scripts)) } +/// Extract all files from the wheel into the site packages. +#[cfg(any(target_os = "macos", target_os = "ios"))] +fn unpack_wheel_files(site_packages: &Path, wheel: &Path) -> Result { + use crate::reflink::reflink; + + let mut count = 0usize; + + // On macOS, directly can be recursively copied with a single `clonefile` call. + // So we only need to iterate over the top-level of the directory, and copy each file or + // subdirectory. + for entry in std::fs::read_dir(wheel)? { + let entry = entry?; + let from = entry.path(); + let to = site_packages.join(from.strip_prefix(wheel).unwrap()); + + // Delete the destination if it already exists. + if let Ok(metadata) = to.metadata() { + if metadata.is_dir() { + fs::remove_dir_all(&to)?; + } else if metadata.is_file() { + fs::remove_file(&to)?; + } + } + + // Copy the file. + reflink(&from, &to)?; + + count += 1; + } + + Ok(count) +} + /// Extract all files from the wheel into the site packages -/// -/// Matches with the RECORD entries -/// -/// Returns paths relative to site packages +#[cfg(not(any(target_os = "macos", target_os = "ios")))] fn unpack_wheel_files(site_packages: &Path, wheel: &Path) -> Result { let mut count = 0usize; // Walk over the directory. - for entry in WalkDir::new(wheel) { + for entry in walkdir::WalkDir::new(wheel) { let entry = entry?; let relative = entry.path().strip_prefix(wheel).unwrap(); let out_path = site_packages.join(relative);