Use recursive clonefile calls on macOS (#67)

It turns out that on macOS, you can pass `clonefile` a directory to
recursively copy an entire directory. This speeds up wheel installation
dramatically, by about 3x.
This commit is contained in:
Charlie Marsh 2023-10-08 17:44:02 -04:00 committed by GitHub
parent 1c942ab8fe
commit adbee4fb32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 77 additions and 7 deletions

View file

@ -21,6 +21,8 @@ mod install_location;
#[cfg(feature = "python_bindings")]
mod python_bindings;
mod record;
#[cfg(any(target_os = "macos", target_os = "ios"))]
mod reflink;
mod script;
pub mod unpacked;
mod wheel;

View file

@ -0,0 +1,39 @@
//! Reflink a file on macOS via `clonefile`.
use std::{
ffi::CString,
io,
os::{
raw::{c_char, c_int},
unix::ffi::OsStrExt,
},
path::Path,
};
fn cstr(path: &Path) -> io::Result<CString> {
Ok(CString::new(path.as_os_str().as_bytes())?)
}
// const CLONE_NOFOLLOW: c_int = 0x0001;
const CLONE_NOOWNERCOPY: c_int = 0x0002;
extern "C" {
// http://www.manpagez.com/man/2/clonefileat/
// https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/bsd/sys/clonefile.h
// TODO We need weak linkage here (OSX > 10.12, iOS > 10.0), otherwise compilation will fail on older versions
fn clonefile(src: *const c_char, dest: *const c_char, flags: c_int) -> c_int;
}
pub(crate) fn reflink(from: &Path, to: &Path) -> io::Result<()> {
let src = cstr(from)?;
let dest = cstr(to)?;
#[allow(unsafe_code)]
let ret = unsafe { clonefile(src.as_ptr(), dest.as_ptr(), CLONE_NOOWNERCOPY) };
if ret == -1 {
Err(io::Error::last_os_error())
} else {
Ok(())
}
}

View file

@ -9,7 +9,6 @@ use fs_err as fs;
use fs_err::File;
use mailparse::MailHeaderMap;
use tracing::{debug, span, Level};
use walkdir::WalkDir;
use wheel_filename::WheelFilename;
@ -78,7 +77,7 @@ pub fn install_wheel(
// We always install in the same virtualenv site packages
debug!(name = name.as_str(), "Extracting file");
let num_unpacked = unpack_wheel_files(&site_packages, wheel)?;
debug!(name = name.as_str(), "Extracted {num_unpacked} files",);
debug!(name = name.as_str(), "Extracted {num_unpacked} files");
// Read the RECORD file.
let mut record_file = File::open(&wheel.join(format!("{dist_info_prefix}.dist-info/RECORD")))?;
@ -244,16 +243,46 @@ fn parse_scripts(
Ok((console_scripts, gui_scripts))
}
/// Extract all files from the wheel into the site packages.
#[cfg(any(target_os = "macos", target_os = "ios"))]
fn unpack_wheel_files(site_packages: &Path, wheel: &Path) -> Result<usize, Error> {
use crate::reflink::reflink;
let mut count = 0usize;
// On macOS, directly can be recursively copied with a single `clonefile` call.
// So we only need to iterate over the top-level of the directory, and copy each file or
// subdirectory.
for entry in std::fs::read_dir(wheel)? {
let entry = entry?;
let from = entry.path();
let to = site_packages.join(from.strip_prefix(wheel).unwrap());
// Delete the destination if it already exists.
if let Ok(metadata) = to.metadata() {
if metadata.is_dir() {
fs::remove_dir_all(&to)?;
} else if metadata.is_file() {
fs::remove_file(&to)?;
}
}
// Copy the file.
reflink(&from, &to)?;
count += 1;
}
Ok(count)
}
/// Extract all files from the wheel into the site packages
///
/// Matches with the RECORD entries
///
/// Returns paths relative to site packages
#[cfg(not(any(target_os = "macos", target_os = "ios")))]
fn unpack_wheel_files(site_packages: &Path, wheel: &Path) -> Result<usize, Error> {
let mut count = 0usize;
// Walk over the directory.
for entry in WalkDir::new(wheel) {
for entry in walkdir::WalkDir::new(wheel) {
let entry = entry?;
let relative = entry.path().strip_prefix(wheel).unwrap();
let out_path = site_packages.join(relative);