From d9bcfafa165adb1b1e12927ea37914bbd6a50c78 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 6 Nov 2023 09:09:28 -0800 Subject: [PATCH] Write `direct_url.json` in wheel installer (#337) ## Summary This PR just adds the logic in `install-wheel-rs` to write `direct_url.json`. We're not actually taking advantage of it yet (or wiring it through) in Puffin. Part of https://github.com/astral-sh/puffin/issues/332. --- Cargo.lock | 2 + crates/gourgeist/src/packages.rs | 5 +- crates/install-wheel-rs/Cargo.toml | 2 + crates/install-wheel-rs/src/direct_url.rs | 59 +++++++++++++++++++ crates/install-wheel-rs/src/lib.rs | 4 ++ crates/install-wheel-rs/src/linker.rs | 12 +++- crates/install-wheel-rs/src/main.rs | 1 + .../install-wheel-rs/src/python_bindings.rs | 1 + crates/install-wheel-rs/src/wheel.rs | 26 +++++--- crates/puffin-installer/src/builder.rs | 2 +- crates/puffin-installer/src/downloader.rs | 5 +- crates/puffin-installer/src/installer.rs | 9 ++- crates/puffin-resolver/src/resolver.rs | 2 +- 13 files changed, 110 insertions(+), 20 deletions(-) create mode 100644 crates/install-wheel-rs/src/direct_url.rs diff --git a/Cargo.lock b/Cargo.lock index b2eefb712..9c0fc06bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1449,12 +1449,14 @@ dependencies = [ "reflink-copy", "regex", "serde", + "serde_json", "sha2", "target-lexicon", "tempfile", "thiserror", "tracing", "tracing-subscriber", + "url", "walkdir", "zip", ] diff --git a/crates/gourgeist/src/packages.rs b/crates/gourgeist/src/packages.rs index c656296b2..75db4eb03 100644 --- a/crates/gourgeist/src/packages.rs +++ b/crates/gourgeist/src/packages.rs @@ -54,8 +54,8 @@ pub(crate) fn install_base_packages( let install_location = InstallLocation::new(location.canonicalize()?, info.simple_version()); let install_location = install_location.acquire_lock()?; - // TODO: Use the json api instead - // TODO: Only check the json API so often (monthly? daily?) + // TODO(konstin): Use the json api instead + // TODO(konstin): Only check the json API so often (monthly? daily?) let packages = [ ("pip-23.2.1-py3-none-any.whl", "https://files.pythonhosted.org/packages/50/c2/e06851e8cc28dcad7c155f4753da8833ac06a5c704c109313b8d5a62968a/pip-23.2.1-py3-none-any.whl"), ("setuptools-68.2.2-py3-none-any.whl", "https://files.pythonhosted.org/packages/bb/26/7945080113158354380a12ce26873dd6c1ebd88d47f5bc24e2c5bb38c16a/setuptools-68.2.2-py3-none-any.whl"), @@ -73,6 +73,7 @@ pub(crate) fn install_base_packages( &install_location, File::open(wheel_file)?, &parsed_filename, + None, false, false, &[], diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index 7b02bf577..d7f206fdf 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -37,12 +37,14 @@ rayon = { version = "1.8.0", optional = true } reflink-copy = { workspace = true } regex = { workspace = true } serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } sha2 = { workspace = true } target-lexicon = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, optional = true } +url = { workspace = true } walkdir = { workspace = true } zip = { workspace = true } diff --git a/crates/install-wheel-rs/src/direct_url.rs b/crates/install-wheel-rs/src/direct_url.rs new file mode 100644 index 000000000..2b3ccf51a --- /dev/null +++ b/crates/install-wheel-rs/src/direct_url.rs @@ -0,0 +1,59 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +/// Metadata for a distribution that was installed via a direct URL. +/// +/// See: +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DirectUrl { + /// The direct URL is a path to an archive. For example: + /// ```json + /// {"archive_info": {"hash": "sha256=75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8", "hashes": {"sha256": "75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}}, "url": "https://files.pythonhosted.org/packages/b8/8b/31273bf66016be6ad22bb7345c37ff350276cfd46e389a0c2ac5da9d9073/wheel-0.41.2-py3-none-any.whl"} + /// ``` + ArchiveUrl { + url: String, + archive_info: ArchiveInfo, + #[serde(skip_serializing_if = "Option::is_none")] + subdirectory: Option, + }, + /// The direct URL is path to a VCS repository. For example: + /// ```json + /// {"url": "https://github.com/pallets/flask.git", "vcs_info": {"commit_id": "8d9519df093864ff90ca446d4af2dc8facd3c542", "vcs": "git"}} + /// ``` + VcsUrl { + url: String, + vcs_info: VcsInfo, + #[serde(skip_serializing_if = "Option::is_none")] + subdirectory: Option, + }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct ArchiveInfo { + #[serde(skip_serializing_if = "Option::is_none")] + pub hash: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hashes: Option>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct VcsInfo { + pub vcs: VcsKind, + pub commit_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub requested_revision: Option, +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum VcsKind { + Git, + Hg, + Bzr, + Svn, +} diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index 6615d8341..c49f4b774 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -7,6 +7,7 @@ use platform_info::PlatformInfoError; use thiserror::Error; use zip::result::ZipError; +pub use direct_url::DirectUrl; pub use install_location::{normalize_name, InstallLocation, LockedDir}; use platform_host::{Arch, Os}; pub use record::RecordEntry; @@ -17,6 +18,7 @@ pub use wheel::{ relative_to, SHEBANG_PYTHON, }; +mod direct_url; mod install_location; pub mod linker; #[cfg(feature = "python_bindings")] @@ -60,6 +62,8 @@ pub enum Error { PlatformInfo(#[source] PlatformInfoError), #[error("Invalid version specification, only none or == is supported")] Pep440, + #[error("Invalid direct_url.json")] + DirectUrlJson(#[from] serde_json::Error), } impl Error { diff --git a/crates/install-wheel-rs/src/linker.rs b/crates/install-wheel-rs/src/linker.rs index 306906ab5..ad487c265 100644 --- a/crates/install-wheel-rs/src/linker.rs +++ b/crates/install-wheel-rs/src/linker.rs @@ -15,7 +15,7 @@ use crate::wheel::{ extra_dist_info, install_data, parse_wheel_version, read_scripts_from_section, write_script_entrypoints, }; -use crate::{read_record_file, Error, Script}; +use crate::{read_record_file, DirectUrl, Error, Script}; /// Install the given wheel to the given venv /// @@ -27,6 +27,7 @@ use crate::{read_record_file, Error, Script}; pub fn install_wheel( location: &InstallLocation>, wheel: impl AsRef, + direct_url: Option<&DirectUrl>, link_mode: LinkMode, ) -> Result<(), Error> { let root = location.venv_root(); @@ -105,8 +106,13 @@ pub fn install_wheel( } debug!(name, "Writing extra metadata"); - - extra_dist_info(&site_packages, &dist_info_prefix, true, &mut record)?; + extra_dist_info( + &site_packages, + &dist_info_prefix, + true, + direct_url, + &mut record, + )?; debug!(name, "Writing record"); let mut record_writer = csv::WriterBuilder::new() diff --git a/crates/install-wheel-rs/src/main.rs b/crates/install-wheel-rs/src/main.rs index 50ef7f5e2..293121b10 100644 --- a/crates/install-wheel-rs/src/main.rs +++ b/crates/install-wheel-rs/src/main.rs @@ -65,6 +65,7 @@ fn main() -> Result<(), Error> { &locked_dir, File::open(wheel)?, &filename, + None, args.compile, !args.skip_hashes, &[], diff --git a/crates/install-wheel-rs/src/python_bindings.rs b/crates/install-wheel-rs/src/python_bindings.rs index 5982c3efb..7800da6ea 100644 --- a/crates/install-wheel-rs/src/python_bindings.rs +++ b/crates/install-wheel-rs/src/python_bindings.rs @@ -62,6 +62,7 @@ impl LockedVenv { &self.location, File::open(wheel)?, &filename, + None, true, true, &[], diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index 384b024ab..8c81e3b25 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -14,6 +14,7 @@ use mailparse::MailHeaderMap; use sha2::{Digest, Sha256}; use tempfile::tempdir; use tracing::{debug, error, span, warn, Level}; + use walkdir::WalkDir; use zip::result::ZipError; use zip::write::FileOptions; @@ -24,7 +25,7 @@ use distribution_filename::WheelFilename; use crate::install_location::{InstallLocation, LockedDir}; use crate::record::RecordEntry; use crate::script::Script; -use crate::Error; +use crate::{DirectUrl, Error}; /// `#!/usr/bin/env python` pub const SHEBANG_PYTHON: &str = "#!/usr/bin/env python"; @@ -810,28 +811,32 @@ pub(crate) fn write_file_recorded( Ok(()) } -/// Adds INSTALLER, REQUESTED and `direct_url.json` to the .dist-info dir +/// Adds `INSTALLER`, `REQUESTED` and `direct_url.json` to the .dist-info dir pub(crate) fn extra_dist_info( site_packages: &Path, dist_info_prefix: &str, requested: bool, + direct_url: Option<&DirectUrl>, record: &mut Vec, ) -> Result<(), Error> { + let dist_info_dir = PathBuf::from(format!("{dist_info_prefix}.dist-info")); write_file_recorded( site_packages, - &PathBuf::from(format!("{dist_info_prefix}.dist-info")).join("INSTALLER"), + &dist_info_dir.join("INSTALLER"), env!("CARGO_PKG_NAME"), record, )?; if requested { + write_file_recorded(site_packages, &dist_info_dir.join("REQUESTED"), "", record)?; + } + if let Some(direct_url) = direct_url { write_file_recorded( site_packages, - &PathBuf::from(format!("{dist_info_prefix}.dist-info")).join("REQUESTED"), - "", + &dist_info_dir.join("direct_url.json"), + serde_json::to_string(direct_url)?.as_bytes(), record, )?; } - Ok(()) } @@ -891,6 +896,7 @@ pub fn install_wheel( location: &InstallLocation, reader: impl Read + Seek, filename: &WheelFilename, + direct_url: Option<&DirectUrl>, compile: bool, check_hashes: bool, // initially used to the console scripts, currently unused. Keeping it because we likely need @@ -1006,7 +1012,13 @@ pub fn install_wheel( debug!(name = name.as_str(), "Writing extra metadata"); - extra_dist_info(&site_packages, &dist_info_prefix, true, &mut record)?; + extra_dist_info( + &site_packages, + &dist_info_prefix, + true, + direct_url, + &mut record, + )?; debug!(name = name.as_str(), "Writing record"); let mut record_writer = csv::WriterBuilder::new() diff --git a/crates/puffin-installer/src/builder.rs b/crates/puffin-installer/src/builder.rs index b982f4e23..b5b73c58e 100644 --- a/crates/puffin-installer/src/builder.rs +++ b/crates/puffin-installer/src/builder.rs @@ -39,7 +39,7 @@ impl<'a, T: BuildContext + Send + Sync> Builder<'a, T> { } /// Build a set of source distributions. - pub async fn build(&'a self, distributions: Vec) -> Result> { + pub async fn build(&self, distributions: Vec) -> Result> { // Sort the distributions by size. let mut distributions = distributions; distributions.sort_unstable_by_key(|distribution| match &distribution.remote { diff --git a/crates/puffin-installer/src/downloader.rs b/crates/puffin-installer/src/downloader.rs index 672f95caf..0d3d391bc 100644 --- a/crates/puffin-installer/src/downloader.rs +++ b/crates/puffin-installer/src/downloader.rs @@ -46,10 +46,7 @@ impl<'a> Downloader<'a> { } /// Download a set of distributions. - pub async fn download( - &'a self, - distributions: Vec, - ) -> Result> { + pub async fn download(&self, distributions: Vec) -> Result> { // Sort the distributions by size. let mut distributions = distributions; distributions.sort_unstable_by_key(|wheel| match wheel { diff --git a/crates/puffin-installer/src/installer.rs b/crates/puffin-installer/src/installer.rs index 00433bc7f..3500feaed 100644 --- a/crates/puffin-installer/src/installer.rs +++ b/crates/puffin-installer/src/installer.rs @@ -44,8 +44,13 @@ impl<'a> Installer<'a> { self.venv.interpreter_info().simple_version(), ); - install_wheel_rs::linker::install_wheel(&location, wheel.path(), self.link_mode) - .with_context(|| format!("Failed to install: {wheel}"))?; + install_wheel_rs::linker::install_wheel( + &location, + wheel.path(), + None, + self.link_mode, + ) + .with_context(|| format!("Failed to install: {wheel}"))?; if let Some(reporter) = self.reporter.as_ref() { reporter.on_install_progress(wheel); diff --git a/crates/puffin-resolver/src/resolver.rs b/crates/puffin-resolver/src/resolver.rs index 09668aa76..c860789c3 100644 --- a/crates/puffin-resolver/src/resolver.rs +++ b/crates/puffin-resolver/src/resolver.rs @@ -615,7 +615,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> { Ok::<(), ResolveError>(()) } - async fn process_request(&'a self, request: Request) -> Result { + async fn process_request(&self, request: Request) -> Result { match request { // Fetch package metadata from the registry. Request::Package(package_name) => {