diff --git a/.gitignore b/.gitignore index 73fab072c..8ed136775 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.venv + # Generated by Cargo # will have compiled files and executables debug/ diff --git a/Cargo.lock b/Cargo.lock index d5e88e68c..18ed30b45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1128,6 +1128,7 @@ dependencies = [ "plist", "pyo3", "rayon", + "reflink-copy", "regex", "rfc2047-decoder", "serde", @@ -1719,12 +1720,14 @@ dependencies = [ "install-wheel-rs", "puffin-client", "puffin-interpreter", + "rayon", "tempfile", "tokio", "tokio-util", "tracing", "url", "wheel-filename", + "zip", ] [[package]] @@ -1954,9 +1957,9 @@ dependencies = [ [[package]] name = "reflink-copy" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9e3947399fd46f412918bafde71ec68f9b3505f11ef082eeb80bc7fdf4d7caf" +checksum = "d7e3e017e993f86feeddf8a7fb609ca49f89082309e328e27aefd4a25bb317a4" dependencies = [ "cfg-if", "ioctl-sys", diff --git a/Cargo.toml b/Cargo.toml index 7129ba7aa..734110de3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ memchr = { version = "2.6.4" } once_cell = { version = "1.18.0" } platform-info = { version = "2.0.2" } plist = { version = "1.5.0" } +rayon = { version = "1.8.0" } +reflink-copy = { version = "0.1.9" } regex = { version = "1.9.6" } reqwest = { version = "0.11.22", features = ["json", "gzip", "brotli", "stream"] } reqwest-middleware = { version = "0.2.3" } diff --git a/README.md b/README.md index 17f6f8b6a..6aab0a7e3 100644 --- a/README.md +++ b/README.md @@ -70,15 +70,7 @@ Options: ### Resolution -To compare a warm run of `puffin` to `pip-compile`: - -```shell -hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ - "./target/release/puffin-cli compile requirements.txt" \ - "pip-compile requirements.txt -o /tmp/tmp.txt" -``` - -To compare a cold run of `puffin` to `pip-compile`: +To compare with a cold cache: ```shell hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ @@ -86,28 +78,36 @@ hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ "pip-compile requirements.txt --rebuild --pip-args '--no-cache-dir' -o /tmp/tmp.txt" ``` -### Installation - -To compare a warm run of `puffin` to `pip`: +To compare with a warm cache: ```shell -hyperfine --runs 10 --warmup 3 \ - "./target/release/puffin-cli sync requirements.txt --ignore-installed" \ - "pip install -r requirements.txt --ignore-installed --no-deps" +hyperfine --runs 10 --warmup 3 --prepare "rm -f /tmp/tmp.txt" \ + "./target/release/puffin-cli compile requirements.txt" \ + "pip-compile requirements.txt -o /tmp/tmp.txt" ``` -To compare a cold run of `puffin` to `pip`: +### Installation + +To compare with a cold cache: ```shell -hyperfine --runs 10 --warmup 3 \ +hyperfine --runs 10 --warmup 3 --prepare "rm -rf .venv && virtualenv .venv && source .venv/bin/activate" \ "./target/release/puffin-cli sync requirements.txt --ignore-installed --no-cache" \ "pip install -r requirements.txt --ignore-installed --no-cache-dir --no-deps" ``` -To compare a run in which all requirements are already installed: +To compare with a warm cache: ```shell -hyperfine --runs 10 --warmup 3 \ +hyperfine --runs 10 --warmup 3 --prepare "rm -rf .venv && virtualenv .venv && source .venv/bin/activate" \ + "./target/release/puffin-cli sync requirements.txt --ignore-installed" \ + "pip install -r requirements.txt --ignore-installed --no-deps" +``` + +To compare with all dependencies already installed: + +```shell +hyperfine --runs 10 --warmup 3 --prepare "rm -rf .venv && virtualenv .venv && source .venv/bin/activate" \ "./target/release/puffin-cli sync requirements.txt" \ "pip install -r requirements.txt --no-deps" ``` diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index 506c34a01..f1115c08b 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -28,6 +28,7 @@ platform-info = { workspace = true } plist = { workspace = true } pyo3 = { version = "0.19.2", features = ["extension-module", "abi3-py37"], optional = true } rayon = { version = "1.8.0", optional = true } +reflink-copy = { workspace = true } regex = { workspace = true } rfc2047-decoder = { workspace = true } serde = { workspace = true, features = ["derive"] } diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index c2f31dceb..ddbc8cfc3 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -1,21 +1,28 @@ //! Takes a wheel and installs it, either in a venv or for monotrail. use std::io; +use std::io::{Read, Seek}; use platform_info::PlatformInfoError; use thiserror::Error; use zip::result::ZipError; +use zip::ZipArchive; pub use install_location::{normalize_name, InstallLocation, LockedDir}; use platform_host::{Arch, Os}; +pub use record::RecordEntry; +pub use script::Script; pub use wheel::{ get_script_launcher, install_wheel, parse_key_value_file, read_record_file, relative_to, - Script, SHEBANG_PYTHON, + SHEBANG_PYTHON, }; mod install_location; #[cfg(feature = "python_bindings")] mod python_bindings; +mod record; +mod script; +pub mod unpacked; mod wheel; #[derive(Error, Debug)] @@ -65,3 +72,13 @@ impl Error { } } } + +pub fn do_thing(reader: impl Read + Seek) -> Result<(), Error> { + let x = tempfile::tempdir()?; + let mut archive = + ZipArchive::new(reader).map_err(|err| Error::from_zip_error("(index)".to_string(), err))?; + + archive.extract(x.path()).unwrap(); + + Ok(()) +} diff --git a/crates/install-wheel-rs/src/record.rs b/crates/install-wheel-rs/src/record.rs new file mode 100644 index 000000000..404cee531 --- /dev/null +++ b/crates/install-wheel-rs/src/record.rs @@ -0,0 +1,16 @@ +use serde::{Deserialize, Serialize}; + +/// Line in a RECORD file +/// +/// +/// ```csv +/// tqdm/cli.py,sha256=x_c8nmc4Huc-lKEsAXj78ZiyqSJ9hJ71j7vltY67icw,10509 +/// tqdm-4.62.3.dist-info/RECORD,, +/// ``` +#[derive(Deserialize, Serialize, PartialOrd, PartialEq, Ord, Eq)] +pub struct RecordEntry { + pub path: String, + pub hash: Option, + #[allow(dead_code)] + pub size: Option, +} diff --git a/crates/install-wheel-rs/src/script.rs b/crates/install-wheel-rs/src/script.rs new file mode 100644 index 000000000..cc6f10953 --- /dev/null +++ b/crates/install-wheel-rs/src/script.rs @@ -0,0 +1,78 @@ +use std::collections::{HashMap, HashSet}; + +use regex::Regex; +use serde::Serialize; + +use crate::Error; + +/// Minimal `direct_url.json` schema +/// +/// +/// +#[derive(Serialize)] +struct DirectUrl { + #[allow(clippy::zero_sized_map_values)] + archive_info: HashMap<(), ()>, + url: String, +} + +/// A script defining the name of the runnable entrypoint and the module and function that should be +/// run. +#[cfg(feature = "python_bindings")] +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +#[pyo3::pyclass(dict)] +pub struct Script { + #[pyo3(get)] + pub script_name: String, + #[pyo3(get)] + pub module: String, + #[pyo3(get)] + pub function: String, +} + +/// A script defining the name of the runnable entrypoint and the module and function that should be +/// run. +#[cfg(not(feature = "python_bindings"))] +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Script { + pub script_name: String, + pub module: String, + pub function: String, +} + +impl Script { + /// Parses a script definition like `foo.bar:main` or `foomod:main_bar [bar,baz]` + /// + /// + /// + /// Extras are supposed to be ignored, which happens if you pass None for extras + pub fn from_value( + script_name: &str, + value: &str, + extras: Option<&[String]>, + ) -> Result, Error> { + let script_regex = Regex::new(r"^(?P[\w\d_\-.]+):(?P[\w\d_\-.]+)(?:\s+\[(?P(?:[^,]+,?\s*)+)\])?$").unwrap(); + + let captures = script_regex + .captures(value) + .ok_or_else(|| Error::InvalidWheel(format!("invalid console script: '{value}'")))?; + if let Some(script_extras) = captures.name("extras") { + let script_extras = script_extras + .as_str() + .split(',') + .map(|extra| extra.trim().to_string()) + .collect::>(); + if let Some(extras) = extras { + if !script_extras.is_subset(&extras.iter().cloned().collect()) { + return Ok(None); + } + } + } + + Ok(Some(Script { + script_name: script_name.to_string(), + module: captures.name("module").unwrap().as_str().to_string(), + function: captures.name("function").unwrap().as_str().to_string(), + })) + } +} diff --git a/crates/install-wheel-rs/src/unpacked.rs b/crates/install-wheel-rs/src/unpacked.rs new file mode 100644 index 000000000..749e4dd3b --- /dev/null +++ b/crates/install-wheel-rs/src/unpacked.rs @@ -0,0 +1,285 @@ +//! Like `wheel.rs`, but for installing wheels that have already been unzipped, rather than +//! reading from a zip file. + +use std::io::Read; +use std::path::Path; + +use configparser::ini::Ini; +use fs_err as fs; +use fs_err::File; +use mailparse::MailHeaderMap; +use tracing::{debug, span, Level}; +use walkdir::WalkDir; + +use wheel_filename::WheelFilename; + +use crate::install_location::{InstallLocation, LockedDir}; +use crate::wheel::{ + extra_dist_info, install_data, parse_wheel_version, read_scripts_from_section, + write_script_entrypoints, +}; +use crate::{read_record_file, Error, Script}; + +/// Install the given wheel to the given venv +/// +/// The caller must ensure that the wheel is compatible to the environment. +/// +/// +/// +/// Wheel 1.0: +pub fn install_wheel( + location: &InstallLocation, + wheel: &Path, + filename: &WheelFilename, +) -> Result { + let name = &filename.distribution; + let _my_span = span!(Level::DEBUG, "install_wheel", name = name.as_str()); + + let InstallLocation::Venv { + venv_base: base_location, + .. + } = location + else { + return Err(Error::InvalidWheel( + "Monotrail installation is not supported yet".to_string(), + )); + }; + + // TODO(charlie): Pass this in. + let site_packages_python = format!( + "python{}.{}", + location.get_python_version().0, + location.get_python_version().1 + ); + let site_packages = if cfg!(target_os = "windows") { + base_location.join("Lib").join("site-packages") + } else { + base_location + .join("lib") + .join(site_packages_python) + .join("site-packages") + }; + + debug!(name = name.as_str(), "Getting wheel metadata"); + let dist_info_prefix = find_dist_info(wheel)?; + let (name, _version) = read_metadata(&dist_info_prefix, wheel)?; + // TODO: Check that name and version match + + // We're going step by step though + // https://packaging.python.org/en/latest/specifications/binary-distribution-format/#installing-a-wheel-distribution-1-0-py32-none-any-whl + // > 1.a Parse distribution-1.0.dist-info/WHEEL. + // > 1.b Check that installer is compatible with Wheel-Version. Warn if minor version is greater, abort if major version is greater. + let wheel_file_path = wheel.join(format!("{dist_info_prefix}.dist-info/WHEEL")); + let wheel_text = std::fs::read_to_string(&wheel_file_path)?; + parse_wheel_version(&wheel_text)?; + + // > 1.c If Root-Is-Purelib == ‘true’, unpack archive into purelib (site-packages). + // > 1.d Else unpack archive into platlib (site-packages). + // We always install in the same virtualenv site packages + debug!(name = name.as_str(), "Extracting file"); + let num_unpacked = unpack_wheel_files(&site_packages, wheel)?; + debug!(name = name.as_str(), "Extracted {num_unpacked} files",); + + // Read the RECORD file. + let mut record_file = File::open(&wheel.join(format!("{dist_info_prefix}.dist-info/RECORD")))?; + let mut record = read_record_file(&mut record_file)?; + + debug!(name = name.as_str(), "Writing entrypoints"); + let (console_scripts, gui_scripts) = parse_scripts(wheel, &dist_info_prefix, None)?; + write_script_entrypoints(&site_packages, location, &console_scripts, &mut record)?; + write_script_entrypoints(&site_packages, location, &gui_scripts, &mut record)?; + + let data_dir = site_packages.join(format!("{dist_info_prefix}.data")); + // 2.a Unpacked archive includes distribution-1.0.dist-info/ and (if there is data) distribution-1.0.data/. + // 2.b Move each subtree of distribution-1.0.data/ onto its destination path. Each subdirectory of distribution-1.0.data/ is a key into a dict of destination directories, such as distribution-1.0.data/(purelib|platlib|headers|scripts|data). The initially supported paths are taken from distutils.command.install. + if data_dir.is_dir() { + debug!(name = name.as_str(), "Installing data"); + install_data( + base_location, + &site_packages, + &data_dir, + &name, + location, + &console_scripts, + &gui_scripts, + &mut record, + )?; + // 2.c If applicable, update scripts starting with #!python to point to the correct interpreter. + // Script are unsupported through data + // 2.e Remove empty distribution-1.0.data directory. + fs::remove_dir_all(data_dir)?; + } else { + debug!(name = name.as_str(), "No data"); + } + + debug!(name = name.as_str(), "Writing extra metadata"); + + extra_dist_info(&site_packages, &dist_info_prefix, true, &mut record)?; + + debug!(name = name.as_str(), "Writing record"); + let mut record_writer = csv::WriterBuilder::new() + .has_headers(false) + .escape(b'"') + .from_path(site_packages.join(format!("{dist_info_prefix}.dist-info/RECORD")))?; + record.sort(); + for entry in record { + record_writer.serialize(entry)?; + } + + Ok(filename.get_tag()) +} + +/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or +/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase. +/// Either way, we just search the wheel for the name +/// +/// +fn find_dist_info(path: &Path) -> Result { + // Iterate over `path` to find the `.dist-info` directory. It should be at the top-level. + let Some(dist_info) = std::fs::read_dir(path)?.find_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.is_dir() { + if path.extension().map_or(false, |ext| ext == "dist-info") { + Some(path) + } else { + None + } + } else { + None + } + }) else { + return Err(Error::InvalidWheel( + "Missing .dist-info directory".to_string(), + )); + }; + + let Some(dist_info_prefix) = dist_info.file_stem() else { + return Err(Error::InvalidWheel( + "Missing .dist-info directory".to_string(), + )); + }; + + Ok(dist_info_prefix.to_string_lossy().to_string()) +} + +/// +fn read_metadata(dist_info_prefix: &str, wheel: &Path) -> Result<(String, String), Error> { + let metadata_file = wheel.join(format!("{dist_info_prefix}.dist-info/METADATA")); + + // Read into a buffer. + let mut content = Vec::new(); + File::open(&metadata_file)?.read_to_end(&mut content)?; + + // HACK: trick mailparse to parse as UTF-8 instead of ASCII + let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec(); + mail.extend_from_slice(&content); + let msg = mailparse::parse_mail(&mail).map_err(|err| { + Error::InvalidWheel(format!("Invalid {}: {}", metadata_file.display(), err)) + })?; + let headers = msg.get_headers(); + let metadata_version = + headers + .get_first_value("Metadata-Version") + .ok_or(Error::InvalidWheel(format!( + "No Metadata-Version field in {}", + metadata_file.display() + )))?; + // Crude but it should do https://packaging.python.org/en/latest/specifications/core-metadata/#metadata-version + // At time of writing: + // > Version of the file format; legal values are “1.0”, “1.1”, “1.2”, “2.1”, “2.2”, and “2.3”. + if !(metadata_version.starts_with("1.") || metadata_version.starts_with("2.")) { + return Err(Error::InvalidWheel(format!( + "Metadata-Version field has unsupported value {metadata_version}" + ))); + } + let name = headers + .get_first_value("Name") + .ok_or(Error::InvalidWheel(format!( + "No Name field in {}", + metadata_file.display() + )))?; + let version = headers + .get_first_value("Version") + .ok_or(Error::InvalidWheel(format!( + "No Version field in {}", + metadata_file.display() + )))?; + Ok((name, version)) +} + +/// Parses the `entry_points.txt` entry in the wheel for console scripts +/// +/// Returns (`script_name`, module, function) +/// +/// Extras are supposed to be ignored, which happens if you pass None for extras +fn parse_scripts( + wheel: &Path, + dist_info_prefix: &str, + extras: Option<&[String]>, +) -> Result<(Vec