Support prepare_metadata_for_build_wheel (#106)

Support calling `prepare_metadata_for_build_wheel`, which can give you
the metadata without executing the actual build if the backend supports
it.

This makes the code a lot uglier since we effectively have a state
machine:

* Setup: Either venv plus requires (PEP 517) or just a venv (setup.py)
* Get metadata (optional step): None (setup.py) or
`prepare_metadata_for_build_wheel` and saving that result
* Build: `setup.py`, `build_wheel()` or
`build_wheel(metadata_directory=metadata_directory)`, but i think i got
general flow right.

@charliermarsh This is a "barely works but unblocks building on top"
implementation, say if you want more polishing (i'll look at this again
tomorrow)
This commit is contained in:
konsti 2023-10-18 14:48:30 +02:00 committed by GitHub
parent 4c87a1d42c
commit fec4ee2848
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 368 additions and 193 deletions

View file

@ -11,11 +11,12 @@ use indoc::formatdoc;
use pep508_rs::Requirement;
use pyproject_toml::PyProjectToml;
use std::io;
use std::io::BufRead;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use tar::Archive;
use tempfile::tempdir;
use tempfile::TempDir;
use thiserror::Error;
use tracing::{debug, instrument};
use zip::ZipArchive;
@ -31,7 +32,7 @@ pub enum Error {
#[error("Invalid source distribution: {0}")]
InvalidSourceDistribution(String),
#[error("Invalid pyproject.toml")]
PyprojectTomlInvalid(#[from] toml::de::Error),
InvalidPyprojectToml(#[from] toml::de::Error),
#[error("Failed to install requirements")]
RequirementsInstall(#[source] anyhow::Error),
#[error("Failed to create temporary virtual environment")]
@ -56,6 +57,347 @@ impl Error {
}
}
/// `[build-backend]` from pyproject.toml
struct Pep517Backend {
/// The build backend string such as `setuptools.build_meta:__legacy__` or `maturin` from
/// `build-backend.backend` in pyproject.toml
backend: String,
/// `build-backend.requirements` in pyproject.toml
requirements: Vec<Requirement>,
}
impl Pep517Backend {
fn backend_import(&self) -> String {
if let Some((path, object)) = self.backend.split_once(':') {
format!("from {path} import {object}")
} else {
format!("import {}", self.backend)
}
}
}
/// Holds the state through a series of PEP 517 frontend to backend calls or a single setup.py
/// invocation.
///
/// This keeps both the temp dir and the result of a potential `prepare_metadata_for_build_wheel`
/// call which changes how we call `build_wheel`.
pub struct SourceDistributionBuilder {
temp_dir: TempDir,
source_tree: PathBuf,
/// `Some` if this is a PEP 517 build
pep517_backend: Option<Pep517Backend>,
venv: Venv,
/// Populated if `prepare_metadata_for_build_wheel` was called.
///
/// > If the build frontend has previously called prepare_metadata_for_build_wheel and depends
/// > on the wheel resulting from this call to have metadata matching this earlier call, then
/// > it should provide the path to the created .dist-info directory as the metadata_directory
/// > argument. If this argument is provided, then build_wheel MUST produce a wheel with
/// > identical metadata. The directory passed in by the build frontend MUST be identical to the
/// > directory created by prepare_metadata_for_build_wheel, including any unrecognized files
/// > it created.
metadata_directory: Option<PathBuf>,
}
impl SourceDistributionBuilder {
/// Extract the source distribution and create a venv with the required packages
pub fn setup(
sdist: &Path,
base_python: &Path,
interpreter_info: &InterpreterInfo,
) -> Result<SourceDistributionBuilder, Error> {
let temp_dir = TempDir::new()?;
// TODO(konstin): Parse and verify filenames
debug!("Unpacking for build {}", sdist.display());
let extracted = temp_dir.path().join("extracted");
let source_tree = extract_archive(sdist, &extracted)?;
// Check if we have a PEP 517 build, otherwise we'll fall back to setup.py
let mut pep517 = None;
if source_tree.join("pyproject.toml").is_file() {
let pyproject_toml: PyProjectToml =
toml::from_str(&fs::read_to_string(source_tree.join("pyproject.toml"))?)
.map_err(Error::InvalidPyprojectToml)?;
// > If the pyproject.toml file is absent, or the build-backend key is missing, the
// > source tree is not using this specification, and tools should revert to the legacy
// > behaviour of running setup.py (either directly, or by implicitly invoking the
// > setuptools.build_meta:__legacy__ backend).
if let Some(backend) = pyproject_toml.build_system.build_backend {
pep517 = Some(Pep517Backend {
backend,
requirements: pyproject_toml.build_system.requires,
});
};
if pyproject_toml.build_system.backend_path.is_some() {
todo!("backend-path is not supported yet")
}
}
let venv = if let Some(pep517_backend) = &pep517 {
create_pep517_build_environment(
temp_dir.path(),
&source_tree,
base_python,
interpreter_info,
pep517_backend,
)?
} else {
if !source_tree.join("setup.py").is_file() {
return Err(Error::InvalidSourceDistribution(
"The archive contains neither a pyproject.toml or a setup.py at the top level"
.to_string(),
));
}
gourgeist::create_venv(
temp_dir.path().join("venv"),
base_python,
interpreter_info,
false,
)?
};
Ok(Self {
temp_dir,
source_tree,
pep517_backend: pep517,
venv,
metadata_directory: None,
})
}
/// Try calling `prepare_metadata_for_build_wheel` to get the metadata without executing the
/// actual build
///
/// TODO(konstin): Return the actual metadata instead of the dist-info dir
pub fn get_metadata_without_build(&mut self) -> Result<Option<&Path>, Error> {
// setup.py builds don't support this
let Some(pep517_backend) = &self.pep517_backend else {
return Ok(None);
};
let metadata_directory = self.temp_dir.path().join("metadata_directory");
fs::create_dir(&metadata_directory)?;
debug!(
"Calling `{}.prepare_metadata_for_build_wheel()`",
pep517_backend.backend
);
let script = formatdoc! {
r#"{} as backend
import json
if get_requires_for_build_wheel := getattr(backend, "prepare_metadata_for_build_wheel", None):
print(get_requires_for_build_wheel("{}"))
else:
print()
"#, pep517_backend.backend_import(), escape_path_for_python(&metadata_directory)
};
let output =
run_python_script(&self.venv.python_interpreter(), &script, &self.source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to determine metadata through `prepare_metadata_for_build_wheel`".to_string(),
&output,
));
}
let message = output
.stdout
.lines()
.last()
// flatten is nightly only :/
.transpose()
.map_err(|err| err.to_string())
.and_then(|last_line| last_line.ok_or("Missing message".to_string()))
.map_err(|err| {
Error::from_command_output(
format!(
"Build backend failed to return metadata directory with \
`prepare_metadata_for_build_wheel`: {err}"
),
&output,
)
})?;
if message.is_empty() {
return Ok(None);
}
self.metadata_directory = Some(metadata_directory.join(message));
return Ok(self.metadata_directory.as_deref());
}
/// Build a source distribution from an archive (`.zip` or `.tar.gz`), return the location of the
/// built wheel.
///
/// The location will be inside `temp_dir`, i.e. you must use the wheel before dropping the temp
/// dir.
///
/// <https://packaging.python.org/en/latest/specifications/source-distribution-format/>
#[instrument(skip(self))]
pub fn build(&self, wheel_dir: &Path) -> Result<PathBuf, Error> {
// The build scripts run with the extracted root as cwd, so they need the absolute path
let wheel_dir = fs::canonicalize(wheel_dir)?;
if let Some(pep517_backend) = &self.pep517_backend {
self.pep517_build_wheel(&wheel_dir, pep517_backend)
} else {
// We checked earlier that setup.py exists
let python_interpreter = self.venv.python_interpreter();
let output = Command::new(&python_interpreter)
.args(["setup.py", "bdist_wheel"])
.current_dir(&self.source_tree)
.output()
.map_err(|err| Error::CommandFailed(python_interpreter, err))?;
if !output.status.success() {
return Err(Error::from_command_output(
"Failed building wheel through setup.py".to_string(),
&output,
));
}
let dist = fs::read_dir(self.source_tree.join("dist"))?;
let dist_dir = dist.collect::<io::Result<Vec<DirEntry>>>()?;
let [dist_wheel] = dist_dir.as_slice() else {
return Err(Error::from_command_output(
format!(
"Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}"
),
&output,
));
};
// TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir
let wheel = wheel_dir.join(dist_wheel.file_name());
fs::copy(dist_wheel.path(), &wheel)?;
// TODO(konstin): Check wheel filename
Ok(wheel)
}
}
fn pep517_build_wheel(
&self,
wheel_dir: &Path,
pep517_backend: &Pep517Backend,
) -> Result<PathBuf, Error> {
let metadata_directory = self
.metadata_directory
.as_deref()
.map_or("None".to_string(), |path| {
format!(r#""{}""#, escape_path_for_python(path))
});
debug!(
"Calling `{}.build_wheel(metadata_directory={})`",
pep517_backend.backend, metadata_directory
);
let escaped_wheel_dir = escape_path_for_python(wheel_dir);
let script = formatdoc! {
r#"{} as backend
print(backend.build_wheel("{}", metadata_directory={}))
"#, pep517_backend.backend_import(), escaped_wheel_dir, metadata_directory
};
let output =
run_python_script(&self.venv.python_interpreter(), &script, &self.source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to build wheel through `build_wheel()` ".to_string(),
&output,
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let wheel = stdout
.lines()
.last()
.map(|wheel_filename| wheel_dir.join(wheel_filename));
let Some(wheel) = wheel.filter(|wheel| wheel.is_file()) else {
return Err(Error::from_command_output(
"Build backend did not return the wheel filename through `build_wheel()`"
.to_string(),
&output,
));
};
Ok(wheel)
}
}
fn escape_path_for_python(path: &Path) -> String {
path.to_string_lossy()
.replace('\\', "\\\\")
.replace('"', "\\\"")
}
/// Not a method because we call it before the builder is completely initialized
fn create_pep517_build_environment(
root: &Path,
source_tree: &Path,
base_python: &Path,
data: &InterpreterInfo,
pep517_backend: &Pep517Backend,
) -> Result<Venv, Error> {
// TODO(konstin): Create bare venvs when we don't need pip anymore
let venv = gourgeist::create_venv(root.join(".venv"), base_python, data, false)?;
resolve_and_install(venv.deref().as_std_path(), &pep517_backend.requirements)
.map_err(Error::RequirementsInstall)?;
debug!(
"Calling `{}.get_requires_for_build_wheel()`",
pep517_backend.backend
);
let script = formatdoc! {
r#"{} as backend
import json
if get_requires_for_build_wheel := getattr(backend, "get_requires_for_build_wheel", None):
requires = get_requires_for_build_wheel()
else:
requires = []
print(json.dumps(requires))
"#, pep517_backend.backend_import()
};
let output = run_python_script(&venv.python_interpreter(), &script, source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to determine extras requires with `get_requires_for_build_wheel`"
.to_string(),
&output,
));
}
let extra_requires = output
.stdout
.lines()
.last()
// flatten is nightly only :/
.transpose()
.map_err(|err| err.to_string())
.and_then(|last_line| last_line.ok_or("Missing message".to_string()))
.and_then(|message| serde_json::from_str(&message).map_err(|err| err.to_string()));
let extra_requires: Vec<Requirement> = extra_requires.map_err(|err| {
Error::from_command_output(
format!(
"Build backend failed to return extras requires with \
`get_requires_for_build_wheel`: {err}"
),
&output,
)
})?;
// Some packages (such as tqdm 4.66.1) list only extra requires that have already been part of
// the pyproject.toml requires (in this case, `wheel`). We can skip doing the whole resolution
// and installation again.
// TODO(konstin): Do we still need this when we have a fast resolver?
if !extra_requires.is_empty()
&& !extra_requires
.iter()
.all(|req| pep517_backend.requirements.contains(req))
{
debug!("Installing extra requirements for build backend");
// TODO(konstin): Do we need to resolve them together?
let requirements: Vec<Requirement> = pep517_backend
.requirements
.iter()
.cloned()
.chain(extra_requires)
.collect();
resolve_and_install(&*venv, &requirements).map_err(Error::RequirementsInstall)?;
}
Ok(venv)
}
#[instrument(skip_all)]
fn resolve_and_install(venv: impl AsRef<Path>, requirements: &[Requirement]) -> anyhow::Result<()> {
debug!("Calling pip to install build dependencies");
@ -80,25 +422,29 @@ fn resolve_and_install(venv: impl AsRef<Path>, requirements: &[Requirement]) ->
/// Returns the directory with the `pyproject.toml`/`setup.py`
#[instrument(skip_all, fields(path))]
fn extract_archive(path: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
// TODO(konstin): Simplify this with camino paths?
if path.extension().is_some_and(|extension| extension == "zip") {
let mut archive = ZipArchive::new(File::open(path)?)?;
if sdist
.extension()
.is_some_and(|extension| extension == "zip")
{
let mut archive = ZipArchive::new(File::open(sdist)?)?;
archive.extract(extracted)?;
// .tar.gz
} else if path.extension().is_some_and(|extension| extension == "gz")
&& path.file_stem().is_some_and(|stem| {
} else if sdist.extension().is_some_and(|extension| extension == "gz")
&& sdist.file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|extension| extension == "tar")
})
{
let mut archive = Archive::new(GzDecoder::new(File::open(path)?));
let mut archive = Archive::new(GzDecoder::new(File::open(sdist)?));
archive.unpack(extracted)?;
} else {
return Err(Error::UnsupportedArchiveType(
path.file_name()
.unwrap_or(path.as_os_str())
sdist
.file_name()
.unwrap_or(sdist.as_os_str())
.to_string_lossy()
.to_string(),
));
@ -116,190 +462,15 @@ fn extract_archive(path: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
Ok(root.path())
}
#[instrument(skip(script, root))]
#[instrument(skip(script, source_tree))]
fn run_python_script(
python_interpreter: &PathBuf,
script: &String,
root: &Path,
python_interpreter: &Path,
script: &str,
source_tree: &Path,
) -> Result<Output, Error> {
Command::new(python_interpreter)
.args(["-c", script])
.current_dir(root)
.current_dir(source_tree)
.output()
.map_err(|err| Error::CommandFailed(python_interpreter.clone(), err))
}
/// Returns `Ok(None)` if this is not a pyproject.toml build
fn pep517_build(
wheel_dir: &Path,
root: &Path,
temp_dir: &Path,
base_python: &Path,
data: &InterpreterInfo,
) -> Result<Option<PathBuf>, Error> {
if !root.join("pyproject.toml").is_file() {
// We'll try setup.py instead
return Ok(None);
}
// TODO(konstin): Create bare venvs when we don't need pip anymore
let venv = gourgeist::create_venv(temp_dir.join("venv"), base_python, data, false)?;
let pyproject_toml: PyProjectToml =
toml::from_str(&fs::read_to_string(root.join("pyproject.toml"))?)
.map_err(Error::PyprojectTomlInvalid)?;
let mut requirements = pyproject_toml.build_system.requires;
resolve_and_install(venv.deref().as_std_path(), &requirements)
.map_err(Error::RequirementsInstall)?;
let Some(backend) = &pyproject_toml.build_system.build_backend else {
// > If the pyproject.toml file is absent, or the build-backend key is missing, the
// > source tree is not using this specification, and tools should revert to the legacy
// > behaviour of running setup.py (either directly, or by implicitly invoking the
// > setuptools.build_meta:__legacy__ backend).
return Ok(None);
};
let backend_import = if let Some((path, object)) = backend.split_once(':') {
format!("from {path} import {object}")
} else {
format!("import {backend}")
};
debug!("Calling `{}.get_requires_for_build_wheel()`", backend);
let script = formatdoc! {
r#"{} as backend
import json
if get_requires_for_build_wheel := getattr(backend, "get_requires_for_build_wheel", None):
requires = get_requires_for_build_wheel()
else:
requires = []
print(json.dumps(requires))
"#, backend_import
};
let python_interpreter = venv.python_interpreter();
let output = run_python_script(&python_interpreter, &script, root)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to determine extras requires with `get_requires_for_build_wheel`"
.to_string(),
&output,
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let extra_requires: Vec<Requirement> =
serde_json::from_str(stdout.lines().last().unwrap_or_default()).map_err(|err| {
Error::from_command_output(
format!(
"Build backend failed to return extras requires with \
`get_requires_for_build_wheel`: {err}"
),
&output,
)
})?;
// Some packages (such as tqdm 4.66.1) list only extra requires that have already been part of
// the pyproject.toml requires (in this case, `wheel`). We can skip doing the whole resolution
// and installation again.
// TODO(konstin): Do we still need this when we have a fast resolver?
if !extra_requires.is_empty() && !extra_requires.iter().all(|req| requirements.contains(req)) {
debug!("Installing extra requirements for build backend");
// TODO(konstin): Do we need to resolve them together?
requirements.extend(extra_requires);
resolve_and_install(&*venv, &requirements).map_err(Error::RequirementsInstall)?;
}
debug!("Calling `{}.build_wheel()`", backend);
let escaped_wheel_dir = wheel_dir
.display()
.to_string()
.replace('\\', "\\\\")
.replace('"', "\\\"");
let script = formatdoc! {
r#"{} as backend
print(backend.build_wheel("{}"))
"#, backend_import, escaped_wheel_dir
};
let output = run_python_script(&python_interpreter, &script, root)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to build wheel through `build_wheel()` ".to_string(),
&output,
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let wheel = stdout
.lines()
.last()
.map(|wheel_filename| wheel_dir.join(wheel_filename));
let Some(wheel) = wheel.filter(|wheel| wheel.is_file()) else {
return Err(Error::from_command_output(
"Build backend did not return the wheel filename through `build_wheel()`".to_string(),
&output,
));
};
Ok(Some(wheel))
}
/// Build a source distribution from an archive (`.zip` or `.tar.gz`), return the location of the
/// built wheel.
///
/// The location will be inside `temp_dir`, i.e. you must use the wheel before dropping the temp
/// dir.
///
/// <https://packaging.python.org/en/latest/specifications/source-distribution-format/>
#[instrument(skip(wheel_dir, interpreter_info))]
pub fn build_sdist(
path: &Path,
wheel_dir: &Path,
base_python: &Path,
interpreter_info: &InterpreterInfo,
) -> Result<PathBuf, Error> {
debug!("Building {}", path.display());
// TODO(konstin): Parse and verify filenames
let temp_dir = tempdir()?;
let temp_dir = temp_dir.path();
// The build scripts run with the extracted root as cwd, so they need the absolute path
let wheel_dir = fs::canonicalize(wheel_dir)?;
let extracted = temp_dir.join("extracted");
let root = extract_archive(path, &extracted)?;
let wheel = pep517_build(&wheel_dir, &root, temp_dir, base_python, interpreter_info)?;
if let Some(wheel) = wheel {
Ok(wheel)
} else if root.join("setup.py").is_file() {
let venv =
gourgeist::create_venv(temp_dir.join("venv"), base_python, interpreter_info, false)?;
let python_interpreter = venv.python_interpreter();
let output = Command::new(&python_interpreter)
.args(["setup.py", "bdist_wheel"])
.current_dir(&root)
.output()
.map_err(|err| Error::CommandFailed(python_interpreter.clone(), err))?;
if !output.status.success() {
return Err(Error::from_command_output(
"Failed building wheel through setup.py".to_string(),
&output,
));
}
let dist = fs::read_dir(root.join("dist"))?;
let dist_dir = dist.collect::<io::Result<Vec<DirEntry>>>()?;
let [dist_wheel] = dist_dir.as_slice() else {
return Err(Error::from_command_output(
format!(
"Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}"
),
&output,
));
};
// TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir
let wheel = wheel_dir.join(dist_wheel.file_name());
fs::copy(dist_wheel.path(), &wheel)?;
// TODO(konstin): Check wheel filename
Ok(wheel)
} else {
Err(Error::InvalidSourceDistribution(
"The archive contains neither a pyproject.toml or a setup.py at the top level"
.to_string(),
))
}
.map_err(|err| Error::CommandFailed(python_interpreter.to_path_buf(), err))
}

View file

@ -4,7 +4,7 @@ use anyhow::Context;
use clap::Parser;
use colored::Colorize;
use fs_err as fs;
use puffin_build::{build_sdist, Error};
use puffin_build::{Error, SourceDistributionBuilder};
use std::path::PathBuf;
use std::process::ExitCode;
use std::time::Instant;
@ -45,7 +45,8 @@ fn run() -> anyhow::Result<()> {
})?;
let interpreter_info = gourgeist::get_interpreter_info(&base_python)?;
let wheel = build_sdist(&args.sdist, &wheel_dir, &base_python, &interpreter_info)?;
let builder = SourceDistributionBuilder::setup(&args.sdist, &base_python, &interpreter_info)?;
let wheel = builder.build(&wheel_dir)?;
println!("Wheel built to {}", wheel.display());
Ok(())
}

View file

@ -1,5 +1,7 @@
#!/usr/bin/env bash
set -e
mkdir -p downloads
if [ ! -f downloads/tqdm-4.66.1.tar.gz ]; then
wget https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz -O downloads/tqdm-4.66.1.tar.gz
@ -7,6 +9,7 @@ fi
if [ ! -f downloads/geoextract-0.3.1.tar.gz ]; then
wget https://files.pythonhosted.org/packages/c4/00/9d9826a6e1c9139cc7183647f47f6b7acb290fa4c572140aa84a12728e60/geoextract-0.3.1.tar.gz -O downloads/geoextract-0.3.1.tar.gz
fi
rm -rf wheels
RUST_LOG=puffin_build=debug cargo run -p puffin-build --bin puffin-build -- --wheels wheels downloads/tqdm-4.66.1.tar.gz
RUST_LOG=puffin_build=debug cargo run -p puffin-build --bin puffin-build -- --wheels wheels downloads/geoextract-0.3.1.tar.gz