From fec4ee2848094359ec8f56e9e94c2815490a59dd Mon Sep 17 00:00:00 2001 From: konsti Date: Wed, 18 Oct 2023 14:48:30 +0200 Subject: [PATCH] Support prepare_metadata_for_build_wheel (#106) Support calling `prepare_metadata_for_build_wheel`, which can give you the metadata without executing the actual build if the backend supports it. This makes the code a lot uglier since we effectively have a state machine: * Setup: Either venv plus requires (PEP 517) or just a venv (setup.py) * Get metadata (optional step): None (setup.py) or `prepare_metadata_for_build_wheel` and saving that result * Build: `setup.py`, `build_wheel()` or `build_wheel(metadata_directory=metadata_directory)`, but i think i got general flow right. @charliermarsh This is a "barely works but unblocks building on top" implementation, say if you want more polishing (i'll look at this again tomorrow) --- crates/puffin-build/src/lib.rs | 553 +++++++++++++++++++++----------- crates/puffin-build/src/main.rs | 5 +- crates/puffin-build/test.sh | 3 + 3 files changed, 368 insertions(+), 193 deletions(-) diff --git a/crates/puffin-build/src/lib.rs b/crates/puffin-build/src/lib.rs index 0e4c7e653..66e84eb53 100644 --- a/crates/puffin-build/src/lib.rs +++ b/crates/puffin-build/src/lib.rs @@ -11,11 +11,12 @@ use indoc::formatdoc; use pep508_rs::Requirement; use pyproject_toml::PyProjectToml; use std::io; +use std::io::BufRead; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::process::{Command, Output}; use tar::Archive; -use tempfile::tempdir; +use tempfile::TempDir; use thiserror::Error; use tracing::{debug, instrument}; use zip::ZipArchive; @@ -31,7 +32,7 @@ pub enum Error { #[error("Invalid source distribution: {0}")] InvalidSourceDistribution(String), #[error("Invalid pyproject.toml")] - PyprojectTomlInvalid(#[from] toml::de::Error), + InvalidPyprojectToml(#[from] toml::de::Error), #[error("Failed to install requirements")] RequirementsInstall(#[source] anyhow::Error), #[error("Failed to create temporary virtual environment")] @@ -56,6 +57,347 @@ impl Error { } } +/// `[build-backend]` from pyproject.toml +struct Pep517Backend { + /// The build backend string such as `setuptools.build_meta:__legacy__` or `maturin` from + /// `build-backend.backend` in pyproject.toml + backend: String, + /// `build-backend.requirements` in pyproject.toml + requirements: Vec, +} + +impl Pep517Backend { + fn backend_import(&self) -> String { + if let Some((path, object)) = self.backend.split_once(':') { + format!("from {path} import {object}") + } else { + format!("import {}", self.backend) + } + } +} + +/// Holds the state through a series of PEP 517 frontend to backend calls or a single setup.py +/// invocation. +/// +/// This keeps both the temp dir and the result of a potential `prepare_metadata_for_build_wheel` +/// call which changes how we call `build_wheel`. +pub struct SourceDistributionBuilder { + temp_dir: TempDir, + source_tree: PathBuf, + /// `Some` if this is a PEP 517 build + pep517_backend: Option, + venv: Venv, + /// Populated if `prepare_metadata_for_build_wheel` was called. + /// + /// > If the build frontend has previously called prepare_metadata_for_build_wheel and depends + /// > on the wheel resulting from this call to have metadata matching this earlier call, then + /// > it should provide the path to the created .dist-info directory as the metadata_directory + /// > argument. If this argument is provided, then build_wheel MUST produce a wheel with + /// > identical metadata. The directory passed in by the build frontend MUST be identical to the + /// > directory created by prepare_metadata_for_build_wheel, including any unrecognized files + /// > it created. + metadata_directory: Option, +} + +impl SourceDistributionBuilder { + /// Extract the source distribution and create a venv with the required packages + pub fn setup( + sdist: &Path, + base_python: &Path, + interpreter_info: &InterpreterInfo, + ) -> Result { + let temp_dir = TempDir::new()?; + + // TODO(konstin): Parse and verify filenames + debug!("Unpacking for build {}", sdist.display()); + let extracted = temp_dir.path().join("extracted"); + let source_tree = extract_archive(sdist, &extracted)?; + + // Check if we have a PEP 517 build, otherwise we'll fall back to setup.py + let mut pep517 = None; + if source_tree.join("pyproject.toml").is_file() { + let pyproject_toml: PyProjectToml = + toml::from_str(&fs::read_to_string(source_tree.join("pyproject.toml"))?) + .map_err(Error::InvalidPyprojectToml)?; + // > If the pyproject.toml file is absent, or the build-backend key is missing, the + // > source tree is not using this specification, and tools should revert to the legacy + // > behaviour of running setup.py (either directly, or by implicitly invoking the + // > setuptools.build_meta:__legacy__ backend). + if let Some(backend) = pyproject_toml.build_system.build_backend { + pep517 = Some(Pep517Backend { + backend, + requirements: pyproject_toml.build_system.requires, + }); + }; + if pyproject_toml.build_system.backend_path.is_some() { + todo!("backend-path is not supported yet") + } + } + + let venv = if let Some(pep517_backend) = &pep517 { + create_pep517_build_environment( + temp_dir.path(), + &source_tree, + base_python, + interpreter_info, + pep517_backend, + )? + } else { + if !source_tree.join("setup.py").is_file() { + return Err(Error::InvalidSourceDistribution( + "The archive contains neither a pyproject.toml or a setup.py at the top level" + .to_string(), + )); + } + gourgeist::create_venv( + temp_dir.path().join("venv"), + base_python, + interpreter_info, + false, + )? + }; + + Ok(Self { + temp_dir, + source_tree, + pep517_backend: pep517, + venv, + metadata_directory: None, + }) + } + + /// Try calling `prepare_metadata_for_build_wheel` to get the metadata without executing the + /// actual build + /// + /// TODO(konstin): Return the actual metadata instead of the dist-info dir + pub fn get_metadata_without_build(&mut self) -> Result, Error> { + // setup.py builds don't support this + let Some(pep517_backend) = &self.pep517_backend else { + return Ok(None); + }; + + let metadata_directory = self.temp_dir.path().join("metadata_directory"); + fs::create_dir(&metadata_directory)?; + + debug!( + "Calling `{}.prepare_metadata_for_build_wheel()`", + pep517_backend.backend + ); + let script = formatdoc! { + r#"{} as backend + import json + + if get_requires_for_build_wheel := getattr(backend, "prepare_metadata_for_build_wheel", None): + print(get_requires_for_build_wheel("{}")) + else: + print() + "#, pep517_backend.backend_import(), escape_path_for_python(&metadata_directory) + }; + let output = + run_python_script(&self.venv.python_interpreter(), &script, &self.source_tree)?; + if !output.status.success() { + return Err(Error::from_command_output( + "Build backend failed to determine metadata through `prepare_metadata_for_build_wheel`".to_string(), + &output, + )); + } + let message = output + .stdout + .lines() + .last() + // flatten is nightly only :/ + .transpose() + .map_err(|err| err.to_string()) + .and_then(|last_line| last_line.ok_or("Missing message".to_string())) + .map_err(|err| { + Error::from_command_output( + format!( + "Build backend failed to return metadata directory with \ + `prepare_metadata_for_build_wheel`: {err}" + ), + &output, + ) + })?; + if message.is_empty() { + return Ok(None); + } + self.metadata_directory = Some(metadata_directory.join(message)); + return Ok(self.metadata_directory.as_deref()); + } + + /// Build a source distribution from an archive (`.zip` or `.tar.gz`), return the location of the + /// built wheel. + /// + /// The location will be inside `temp_dir`, i.e. you must use the wheel before dropping the temp + /// dir. + /// + /// + #[instrument(skip(self))] + pub fn build(&self, wheel_dir: &Path) -> Result { + // The build scripts run with the extracted root as cwd, so they need the absolute path + let wheel_dir = fs::canonicalize(wheel_dir)?; + + if let Some(pep517_backend) = &self.pep517_backend { + self.pep517_build_wheel(&wheel_dir, pep517_backend) + } else { + // We checked earlier that setup.py exists + let python_interpreter = self.venv.python_interpreter(); + let output = Command::new(&python_interpreter) + .args(["setup.py", "bdist_wheel"]) + .current_dir(&self.source_tree) + .output() + .map_err(|err| Error::CommandFailed(python_interpreter, err))?; + if !output.status.success() { + return Err(Error::from_command_output( + "Failed building wheel through setup.py".to_string(), + &output, + )); + } + let dist = fs::read_dir(self.source_tree.join("dist"))?; + let dist_dir = dist.collect::>>()?; + let [dist_wheel] = dist_dir.as_slice() else { + return Err(Error::from_command_output( + format!( + "Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}" + ), + &output, + )); + }; + // TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir + let wheel = wheel_dir.join(dist_wheel.file_name()); + fs::copy(dist_wheel.path(), &wheel)?; + // TODO(konstin): Check wheel filename + Ok(wheel) + } + } + + fn pep517_build_wheel( + &self, + wheel_dir: &Path, + pep517_backend: &Pep517Backend, + ) -> Result { + let metadata_directory = self + .metadata_directory + .as_deref() + .map_or("None".to_string(), |path| { + format!(r#""{}""#, escape_path_for_python(path)) + }); + debug!( + "Calling `{}.build_wheel(metadata_directory={})`", + pep517_backend.backend, metadata_directory + ); + let escaped_wheel_dir = escape_path_for_python(wheel_dir); + let script = formatdoc! { + r#"{} as backend + print(backend.build_wheel("{}", metadata_directory={})) + "#, pep517_backend.backend_import(), escaped_wheel_dir, metadata_directory + }; + let output = + run_python_script(&self.venv.python_interpreter(), &script, &self.source_tree)?; + if !output.status.success() { + return Err(Error::from_command_output( + "Build backend failed to build wheel through `build_wheel()` ".to_string(), + &output, + )); + } + let stdout = String::from_utf8_lossy(&output.stdout); + let wheel = stdout + .lines() + .last() + .map(|wheel_filename| wheel_dir.join(wheel_filename)); + let Some(wheel) = wheel.filter(|wheel| wheel.is_file()) else { + return Err(Error::from_command_output( + "Build backend did not return the wheel filename through `build_wheel()`" + .to_string(), + &output, + )); + }; + Ok(wheel) + } +} + +fn escape_path_for_python(path: &Path) -> String { + path.to_string_lossy() + .replace('\\', "\\\\") + .replace('"', "\\\"") +} + +/// Not a method because we call it before the builder is completely initialized +fn create_pep517_build_environment( + root: &Path, + source_tree: &Path, + base_python: &Path, + data: &InterpreterInfo, + pep517_backend: &Pep517Backend, +) -> Result { + // TODO(konstin): Create bare venvs when we don't need pip anymore + let venv = gourgeist::create_venv(root.join(".venv"), base_python, data, false)?; + resolve_and_install(venv.deref().as_std_path(), &pep517_backend.requirements) + .map_err(Error::RequirementsInstall)?; + + debug!( + "Calling `{}.get_requires_for_build_wheel()`", + pep517_backend.backend + ); + let script = formatdoc! { + r#"{} as backend + import json + + if get_requires_for_build_wheel := getattr(backend, "get_requires_for_build_wheel", None): + requires = get_requires_for_build_wheel() + else: + requires = [] + print(json.dumps(requires)) + "#, pep517_backend.backend_import() + }; + let output = run_python_script(&venv.python_interpreter(), &script, source_tree)?; + if !output.status.success() { + return Err(Error::from_command_output( + "Build backend failed to determine extras requires with `get_requires_for_build_wheel`" + .to_string(), + &output, + )); + } + let extra_requires = output + .stdout + .lines() + .last() + // flatten is nightly only :/ + .transpose() + .map_err(|err| err.to_string()) + .and_then(|last_line| last_line.ok_or("Missing message".to_string())) + .and_then(|message| serde_json::from_str(&message).map_err(|err| err.to_string())); + let extra_requires: Vec = extra_requires.map_err(|err| { + Error::from_command_output( + format!( + "Build backend failed to return extras requires with \ + `get_requires_for_build_wheel`: {err}" + ), + &output, + ) + })?; + // Some packages (such as tqdm 4.66.1) list only extra requires that have already been part of + // the pyproject.toml requires (in this case, `wheel`). We can skip doing the whole resolution + // and installation again. + // TODO(konstin): Do we still need this when we have a fast resolver? + if !extra_requires.is_empty() + && !extra_requires + .iter() + .all(|req| pep517_backend.requirements.contains(req)) + { + debug!("Installing extra requirements for build backend"); + // TODO(konstin): Do we need to resolve them together? + let requirements: Vec = pep517_backend + .requirements + .iter() + .cloned() + .chain(extra_requires) + .collect(); + resolve_and_install(&*venv, &requirements).map_err(Error::RequirementsInstall)?; + } + Ok(venv) +} + #[instrument(skip_all)] fn resolve_and_install(venv: impl AsRef, requirements: &[Requirement]) -> anyhow::Result<()> { debug!("Calling pip to install build dependencies"); @@ -80,25 +422,29 @@ fn resolve_and_install(venv: impl AsRef, requirements: &[Requirement]) -> /// Returns the directory with the `pyproject.toml`/`setup.py` #[instrument(skip_all, fields(path))] -fn extract_archive(path: &Path, extracted: &PathBuf) -> Result { +fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result { // TODO(konstin): Simplify this with camino paths? - if path.extension().is_some_and(|extension| extension == "zip") { - let mut archive = ZipArchive::new(File::open(path)?)?; + if sdist + .extension() + .is_some_and(|extension| extension == "zip") + { + let mut archive = ZipArchive::new(File::open(sdist)?)?; archive.extract(extracted)?; // .tar.gz - } else if path.extension().is_some_and(|extension| extension == "gz") - && path.file_stem().is_some_and(|stem| { + } else if sdist.extension().is_some_and(|extension| extension == "gz") + && sdist.file_stem().is_some_and(|stem| { Path::new(stem) .extension() .is_some_and(|extension| extension == "tar") }) { - let mut archive = Archive::new(GzDecoder::new(File::open(path)?)); + let mut archive = Archive::new(GzDecoder::new(File::open(sdist)?)); archive.unpack(extracted)?; } else { return Err(Error::UnsupportedArchiveType( - path.file_name() - .unwrap_or(path.as_os_str()) + sdist + .file_name() + .unwrap_or(sdist.as_os_str()) .to_string_lossy() .to_string(), )); @@ -116,190 +462,15 @@ fn extract_archive(path: &Path, extracted: &PathBuf) -> Result { Ok(root.path()) } -#[instrument(skip(script, root))] +#[instrument(skip(script, source_tree))] fn run_python_script( - python_interpreter: &PathBuf, - script: &String, - root: &Path, + python_interpreter: &Path, + script: &str, + source_tree: &Path, ) -> Result { Command::new(python_interpreter) .args(["-c", script]) - .current_dir(root) + .current_dir(source_tree) .output() - .map_err(|err| Error::CommandFailed(python_interpreter.clone(), err)) -} - -/// Returns `Ok(None)` if this is not a pyproject.toml build -fn pep517_build( - wheel_dir: &Path, - root: &Path, - temp_dir: &Path, - base_python: &Path, - data: &InterpreterInfo, -) -> Result, Error> { - if !root.join("pyproject.toml").is_file() { - // We'll try setup.py instead - return Ok(None); - } - // TODO(konstin): Create bare venvs when we don't need pip anymore - let venv = gourgeist::create_venv(temp_dir.join("venv"), base_python, data, false)?; - let pyproject_toml: PyProjectToml = - toml::from_str(&fs::read_to_string(root.join("pyproject.toml"))?) - .map_err(Error::PyprojectTomlInvalid)?; - let mut requirements = pyproject_toml.build_system.requires; - resolve_and_install(venv.deref().as_std_path(), &requirements) - .map_err(Error::RequirementsInstall)?; - let Some(backend) = &pyproject_toml.build_system.build_backend else { - // > If the pyproject.toml file is absent, or the build-backend key is missing, the - // > source tree is not using this specification, and tools should revert to the legacy - // > behaviour of running setup.py (either directly, or by implicitly invoking the - // > setuptools.build_meta:__legacy__ backend). - return Ok(None); - }; - let backend_import = if let Some((path, object)) = backend.split_once(':') { - format!("from {path} import {object}") - } else { - format!("import {backend}") - }; - - debug!("Calling `{}.get_requires_for_build_wheel()`", backend); - let script = formatdoc! { - r#"{} as backend - import json - - if get_requires_for_build_wheel := getattr(backend, "get_requires_for_build_wheel", None): - requires = get_requires_for_build_wheel() - else: - requires = [] - print(json.dumps(requires)) - "#, backend_import - }; - let python_interpreter = venv.python_interpreter(); - let output = run_python_script(&python_interpreter, &script, root)?; - if !output.status.success() { - return Err(Error::from_command_output( - "Build backend failed to determine extras requires with `get_requires_for_build_wheel`" - .to_string(), - &output, - )); - } - let stdout = String::from_utf8_lossy(&output.stdout); - - let extra_requires: Vec = - serde_json::from_str(stdout.lines().last().unwrap_or_default()).map_err(|err| { - Error::from_command_output( - format!( - "Build backend failed to return extras requires with \ - `get_requires_for_build_wheel`: {err}" - ), - &output, - ) - })?; - // Some packages (such as tqdm 4.66.1) list only extra requires that have already been part of - // the pyproject.toml requires (in this case, `wheel`). We can skip doing the whole resolution - // and installation again. - // TODO(konstin): Do we still need this when we have a fast resolver? - if !extra_requires.is_empty() && !extra_requires.iter().all(|req| requirements.contains(req)) { - debug!("Installing extra requirements for build backend"); - // TODO(konstin): Do we need to resolve them together? - requirements.extend(extra_requires); - resolve_and_install(&*venv, &requirements).map_err(Error::RequirementsInstall)?; - } - - debug!("Calling `{}.build_wheel()`", backend); - let escaped_wheel_dir = wheel_dir - .display() - .to_string() - .replace('\\', "\\\\") - .replace('"', "\\\""); - let script = formatdoc! { - r#"{} as backend - print(backend.build_wheel("{}")) - "#, backend_import, escaped_wheel_dir - }; - let output = run_python_script(&python_interpreter, &script, root)?; - if !output.status.success() { - return Err(Error::from_command_output( - "Build backend failed to build wheel through `build_wheel()` ".to_string(), - &output, - )); - } - let stdout = String::from_utf8_lossy(&output.stdout); - let wheel = stdout - .lines() - .last() - .map(|wheel_filename| wheel_dir.join(wheel_filename)); - let Some(wheel) = wheel.filter(|wheel| wheel.is_file()) else { - return Err(Error::from_command_output( - "Build backend did not return the wheel filename through `build_wheel()`".to_string(), - &output, - )); - }; - Ok(Some(wheel)) -} - -/// Build a source distribution from an archive (`.zip` or `.tar.gz`), return the location of the -/// built wheel. -/// -/// The location will be inside `temp_dir`, i.e. you must use the wheel before dropping the temp -/// dir. -/// -/// -#[instrument(skip(wheel_dir, interpreter_info))] -pub fn build_sdist( - path: &Path, - wheel_dir: &Path, - base_python: &Path, - interpreter_info: &InterpreterInfo, -) -> Result { - debug!("Building {}", path.display()); - // TODO(konstin): Parse and verify filenames - let temp_dir = tempdir()?; - let temp_dir = temp_dir.path(); - // The build scripts run with the extracted root as cwd, so they need the absolute path - let wheel_dir = fs::canonicalize(wheel_dir)?; - - let extracted = temp_dir.join("extracted"); - let root = extract_archive(path, &extracted)?; - - let wheel = pep517_build(&wheel_dir, &root, temp_dir, base_python, interpreter_info)?; - - if let Some(wheel) = wheel { - Ok(wheel) - } else if root.join("setup.py").is_file() { - let venv = - gourgeist::create_venv(temp_dir.join("venv"), base_python, interpreter_info, false)?; - let python_interpreter = venv.python_interpreter(); - let output = Command::new(&python_interpreter) - .args(["setup.py", "bdist_wheel"]) - .current_dir(&root) - .output() - .map_err(|err| Error::CommandFailed(python_interpreter.clone(), err))?; - if !output.status.success() { - return Err(Error::from_command_output( - "Failed building wheel through setup.py".to_string(), - &output, - )); - } - let dist = fs::read_dir(root.join("dist"))?; - let dist_dir = dist.collect::>>()?; - let [dist_wheel] = dist_dir.as_slice() else { - return Err(Error::from_command_output( - format!( - "Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}" - ), - &output, - )); - }; - // TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir - let wheel = wheel_dir.join(dist_wheel.file_name()); - fs::copy(dist_wheel.path(), &wheel)?; - // TODO(konstin): Check wheel filename - Ok(wheel) - } else { - Err(Error::InvalidSourceDistribution( - "The archive contains neither a pyproject.toml or a setup.py at the top level" - .to_string(), - )) - } + .map_err(|err| Error::CommandFailed(python_interpreter.to_path_buf(), err)) } diff --git a/crates/puffin-build/src/main.rs b/crates/puffin-build/src/main.rs index 9d8821622..59cec8700 100644 --- a/crates/puffin-build/src/main.rs +++ b/crates/puffin-build/src/main.rs @@ -4,7 +4,7 @@ use anyhow::Context; use clap::Parser; use colored::Colorize; use fs_err as fs; -use puffin_build::{build_sdist, Error}; +use puffin_build::{Error, SourceDistributionBuilder}; use std::path::PathBuf; use std::process::ExitCode; use std::time::Instant; @@ -45,7 +45,8 @@ fn run() -> anyhow::Result<()> { })?; let interpreter_info = gourgeist::get_interpreter_info(&base_python)?; - let wheel = build_sdist(&args.sdist, &wheel_dir, &base_python, &interpreter_info)?; + let builder = SourceDistributionBuilder::setup(&args.sdist, &base_python, &interpreter_info)?; + let wheel = builder.build(&wheel_dir)?; println!("Wheel built to {}", wheel.display()); Ok(()) } diff --git a/crates/puffin-build/test.sh b/crates/puffin-build/test.sh index 252156225..aa96e0971 100644 --- a/crates/puffin-build/test.sh +++ b/crates/puffin-build/test.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -e + mkdir -p downloads if [ ! -f downloads/tqdm-4.66.1.tar.gz ]; then wget https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz -O downloads/tqdm-4.66.1.tar.gz @@ -7,6 +9,7 @@ fi if [ ! -f downloads/geoextract-0.3.1.tar.gz ]; then wget https://files.pythonhosted.org/packages/c4/00/9d9826a6e1c9139cc7183647f47f6b7acb290fa4c572140aa84a12728e60/geoextract-0.3.1.tar.gz -O downloads/geoextract-0.3.1.tar.gz fi +rm -rf wheels RUST_LOG=puffin_build=debug cargo run -p puffin-build --bin puffin-build -- --wheels wheels downloads/tqdm-4.66.1.tar.gz RUST_LOG=puffin_build=debug cargo run -p puffin-build --bin puffin-build -- --wheels wheels downloads/geoextract-0.3.1.tar.gz