Install source distribution requirements with puffin itself instead of pip (#122)

This is also a lot faster. Unfortunately it copies a lot of code from
the sync cli since the `Printer` is private.

The first commit are some refactorings i made when i thought about how i
could reuse the existing code.
This commit is contained in:
konsti 2023-10-18 21:11:17 +02:00 committed by GitHub
parent 7bc42ca2ce
commit 8cc4fe0d44
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 306 additions and 193 deletions

View file

@ -11,14 +11,24 @@ authors = { workspace = true }
license = { workspace = true }
[dependencies]
gourgeist = { version = "0.0.4", path = "../gourgeist" }
pep508_rs = { version = "0.2.3", path = "../pep508-rs" }
gourgeist = { path = "../gourgeist" }
pep508_rs = { path = "../pep508-rs" }
platform-host = { path = "../platform-host" }
platform-tags = { path = "../platform-tags" }
puffin-client = { path = "../puffin-client" }
puffin-installer = { path = "../puffin-installer" }
puffin-interpreter = { path = "../puffin-interpreter" }
puffin-package = { path = "../puffin-package" }
puffin-resolver = { path = "../puffin-resolver" }
puffin-workspace = { path = "../puffin-workspace" }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
directories = { workspace = true }
flate2 = { workspace = true }
fs-err = { workspace = true }
indoc = { workspace = true }
itertools = { workspace = true }
owo-colors = { workspace = true }
pyproject-toml = { workspace = true }
serde = { workspace = true }
@ -26,6 +36,7 @@ serde_json = { workspace = true }
tar = { workspace = true }
tempfile = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }

View file

@ -8,15 +8,24 @@ use fs_err as fs;
use fs_err::{DirEntry, File};
use gourgeist::{InterpreterInfo, Venv};
use indoc::formatdoc;
use itertools::{Either, Itertools};
use pep508_rs::Requirement;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_client::PypiClientBuilder;
use puffin_installer::{Downloader, LocalDistribution, LocalIndex, RemoteDistribution, Unzipper};
use puffin_interpreter::PythonExecutable;
use puffin_package::package_name::PackageName;
use puffin_resolver::WheelFinder;
use pyproject_toml::PyProjectToml;
use std::io;
use std::io::BufRead;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::str::FromStr;
use tar::Archive;
use tempfile::TempDir;
use tempfile::{tempdir, TempDir};
use thiserror::Error;
use tracing::{debug, instrument};
use zip::ZipArchive;
@ -101,12 +110,13 @@ pub struct SourceDistributionBuilder {
impl SourceDistributionBuilder {
/// Extract the source distribution and create a venv with the required packages
pub fn setup(
pub async fn setup(
sdist: &Path,
base_python: &Path,
interpreter_info: &InterpreterInfo,
cache: Option<&Path>,
) -> Result<SourceDistributionBuilder, Error> {
let temp_dir = TempDir::new()?;
let temp_dir = tempdir()?;
// TODO(konstin): Parse and verify filenames
debug!("Unpacking for build {}", sdist.display());
@ -141,7 +151,9 @@ impl SourceDistributionBuilder {
base_python,
interpreter_info,
pep517_backend,
)?
cache,
)
.await?
} else {
if !source_tree.join("setup.py").is_file() {
return Err(Error::InvalidSourceDistribution(
@ -149,12 +161,22 @@ impl SourceDistributionBuilder {
.to_string(),
));
}
gourgeist::create_venv(
let venv = gourgeist::create_venv(
temp_dir.path().join("venv"),
base_python,
interpreter_info,
false,
)?
true,
)?;
// TODO: Resolve those once globally and cache per puffin invocation
let requirements = [
Requirement::from_str("wheel").unwrap(),
Requirement::from_str("setuptools").unwrap(),
Requirement::from_str("pip").unwrap(),
];
resolve_and_install(venv.as_std_path(), &requirements, cache)
.await
.map_err(Error::RequirementsInstall)?;
venv
};
Ok(Self {
@ -323,17 +345,22 @@ fn escape_path_for_python(path: &Path) -> String {
}
/// Not a method because we call it before the builder is completely initialized
fn create_pep517_build_environment(
async fn create_pep517_build_environment(
root: &Path,
source_tree: &Path,
base_python: &Path,
data: &InterpreterInfo,
pep517_backend: &Pep517Backend,
cache: Option<&Path>,
) -> Result<Venv, Error> {
// TODO(konstin): Create bare venvs when we don't need pip anymore
let venv = gourgeist::create_venv(root.join(".venv"), base_python, data, false)?;
resolve_and_install(venv.deref().as_std_path(), &pep517_backend.requirements)
.map_err(Error::RequirementsInstall)?;
let venv = gourgeist::create_venv(root.join(".venv"), base_python, data, true)?;
resolve_and_install(
venv.deref().as_std_path(),
&pep517_backend.requirements,
cache,
)
.await
.map_err(Error::RequirementsInstall)?;
debug!(
"Calling `{}.get_requires_for_build_wheel()`",
@ -393,30 +420,60 @@ fn create_pep517_build_environment(
.cloned()
.chain(extra_requires)
.collect();
resolve_and_install(&*venv, &requirements).map_err(Error::RequirementsInstall)?;
resolve_and_install(&*venv, &requirements, cache)
.await
.map_err(Error::RequirementsInstall)?;
}
Ok(venv)
}
#[instrument(skip_all)]
fn resolve_and_install(venv: impl AsRef<Path>, requirements: &[Requirement]) -> anyhow::Result<()> {
debug!("Calling pip to install build dependencies");
let python = Venv::new(venv.as_ref())?.python_interpreter();
// No error handling because we want have to replace this with the real resolver and installer
// anyway.
let installation = Command::new(python)
.args(["-m", "pip", "install"])
.args(
requirements
.iter()
.map(ToString::to_string)
.collect::<Vec<String>>(),
)
.output()
.context("pip install failed")?;
if !installation.status.success() {
anyhow::bail!("Installation failed :(")
}
async fn resolve_and_install(
venv: impl AsRef<Path>,
requirements: &[Requirement],
cache: Option<&Path>,
) -> anyhow::Result<()> {
debug!("Installing {} build requirements", requirements.len());
let local_index = if let Some(cache) = cache {
LocalIndex::from_directory(cache).await?
} else {
LocalIndex::default()
};
let (cached, uncached): (Vec<LocalDistribution>, Vec<Requirement>) =
requirements.iter().partition_map(|requirement| {
let package = PackageName::normalize(&requirement.name);
if let Some(distribution) = local_index
.get(&package)
.filter(|dist| requirement.is_satisfied_by(dist.version()))
{
Either::Left(distribution.clone())
} else {
Either::Right(requirement.clone())
}
});
let client = PypiClientBuilder::default().cache(cache).build();
let platform = Platform::current()?;
let python = PythonExecutable::from_venv(platform, venv.as_ref(), cache)?;
let tags = Tags::from_env(python.platform(), python.simple_version())?;
let resolution = WheelFinder::new(&tags, &client).resolve(&uncached).await?;
let uncached = resolution
.into_files()
.map(RemoteDistribution::from_file)
.collect::<anyhow::Result<Vec<_>>>()?;
let staging = tempdir()?;
let downloads = Downloader::new(&client, cache)
.download(&uncached, cache.unwrap_or(staging.path()))
.await?;
let unzips = Unzipper::default()
.download(downloads, cache.unwrap_or(staging.path()))
.await
.context("Failed to download and unpack wheels")?;
let wheels = unzips.into_iter().chain(cached).collect::<Vec<_>>();
puffin_installer::Installer::new(&python).install(&wheels)?;
Ok(())
}

View file

@ -2,6 +2,7 @@
use anyhow::Context;
use clap::Parser;
use directories::ProjectDirs;
use fs_err as fs;
use owo_colors::OwoColorize;
use puffin_build::{Error, SourceDistributionBuilder};
@ -27,7 +28,7 @@ struct Args {
sdist: PathBuf,
}
fn run() -> anyhow::Result<()> {
async fn run() -> anyhow::Result<()> {
let args = Args::parse();
let wheel_dir = if let Some(wheel_dir) = args.wheels {
fs::create_dir_all(&wheel_dir).context("Invalid wheel directory")?;
@ -36,6 +37,9 @@ fn run() -> anyhow::Result<()> {
env::current_dir()?
};
let dirs = ProjectDirs::from("", "", "puffin");
let cache = dirs.as_ref().map(ProjectDirs::cache_dir);
// TODO: That's no way to deal with paths in PATH
let base_python = which::which(args.python.unwrap_or("python3".into())).map_err(|err| {
Error::IO(io::Error::new(
@ -45,20 +49,23 @@ fn run() -> anyhow::Result<()> {
})?;
let interpreter_info = gourgeist::get_interpreter_info(&base_python)?;
let builder = SourceDistributionBuilder::setup(&args.sdist, &base_python, &interpreter_info)?;
let builder =
SourceDistributionBuilder::setup(&args.sdist, &base_python, &interpreter_info, cache)
.await?;
let wheel = builder.build(&wheel_dir)?;
println!("Wheel built to {}", wheel.display());
Ok(())
}
fn main() -> ExitCode {
#[tokio::main]
async fn main() -> ExitCode {
tracing_subscriber::registry()
.with(fmt::layer().with_span_events(FmtSpan::CLOSE))
.with(EnvFilter::from_default_env())
.init();
let start = Instant::now();
let result = run();
let result = run().await;
debug!("Took {}ms", start.elapsed().as_millis());
if let Err(err) = result {
eprintln!("{}", "puffin-build failed".red().bold());

View file

@ -53,13 +53,7 @@ pub(crate) async fn compile(
let tags = Tags::from_env(python.platform(), python.simple_version())?;
// Instantiate a client.
let client = {
let mut pypi_client = PypiClientBuilder::default();
if let Some(cache) = cache {
pypi_client = pypi_client.cache(cache);
}
pypi_client.build()
};
let client = PypiClientBuilder::default().cache(cache).build();
// Resolve the dependencies.
let resolver = puffin_resolver::Resolver::new(requirements, markers, &tags, &client);

View file

@ -1,16 +1,17 @@
use std::fmt::Write;
use std::path::Path;
use anyhow::{Context, Result};
use anyhow::{bail, Context, Result};
use bitflags::bitflags;
use itertools::{Either, Itertools};
use owo_colors::OwoColorize;
use pep508_rs::Requirement;
use tracing::debug;
use platform_host::Platform;
use platform_tags::Tags;
use puffin_client::PypiClientBuilder;
use puffin_installer::{LocalIndex, RemoteDistribution};
use puffin_installer::{LocalDistribution, LocalIndex, RemoteDistribution};
use puffin_interpreter::{PythonExecutable, SitePackages};
use puffin_package::package_name::PackageName;
use puffin_package::requirements_txt::RequirementsTxt;
@ -36,10 +37,11 @@ pub(crate) async fn sync(
flags: SyncFlags,
mut printer: Printer,
) -> Result<ExitStatus> {
let start = std::time::Instant::now();
// Read the `requirements.txt` from disk.
let requirements_txt = RequirementsTxt::parse(src, std::env::current_dir()?)?;
if !requirements_txt.constraints.is_empty() {
bail!("Constraints in requirements.txt are not supported");
}
let requirements = requirements_txt
.requirements
.into_iter()
@ -50,6 +52,18 @@ pub(crate) async fn sync(
return Ok(ExitStatus::Success);
}
sync_requirements(&requirements, cache, flags, printer).await
}
/// Install a set of locked requirements into the current Python environment.
pub(crate) async fn sync_requirements(
requirements: &[Requirement],
cache: Option<&Path>,
flags: SyncFlags,
mut printer: Printer,
) -> Result<ExitStatus> {
let start = std::time::Instant::now();
// Detect the current Python interpreter.
let platform = Platform::current()?;
let python = PythonExecutable::from_env(platform, cache)?;
@ -61,57 +75,9 @@ pub(crate) async fn sync(
// Determine the current environment markers.
let tags = Tags::from_env(python.platform(), python.simple_version())?;
// Index all the already-installed packages in site-packages.
let site_packages = if flags.intersects(SyncFlags::IGNORE_INSTALLED) {
SitePackages::default()
} else {
SitePackages::from_executable(&python).await?
};
// Index all the already-downloaded wheels in the cache.
let local_index = if let Some(cache) = cache {
LocalIndex::from_directory(cache).await?
} else {
LocalIndex::default()
};
// Filter out any already-installed or already-cached packages.
let (cached, uncached): (Vec<_>, Vec<_>) = requirements
.iter()
.filter(|requirement| {
let package = PackageName::normalize(&requirement.name);
// Filter out already-installed packages.
if let Some(dist_info) = site_packages.get(&package) {
debug!(
"Requirement already satisfied: {} ({})",
package,
dist_info.version()
);
false
} else {
true
}
})
.partition_map(|requirement| {
let package = PackageName::normalize(&requirement.name);
// Identify any locally-available distributions that satisfy the requirement.
if let Some(distribution) = local_index
.get(&package)
.filter(|dist| requirement.is_satisfied_by(dist.version()))
{
debug!(
"Requirement already cached: {} ({})",
distribution.name(),
distribution.version()
);
Either::Left(distribution.clone())
} else {
debug!("Identified uncached requirement: {}", requirement);
Either::Right(requirement.clone())
}
});
let (cached, uncached) =
find_uncached_requirements(requirements, cache, flags, &python).await?;
// Nothing to do.
if uncached.is_empty() && cached.is_empty() {
@ -130,22 +96,14 @@ pub(crate) async fn sync(
return Ok(ExitStatus::Success);
}
let client = {
let mut pypi_client = PypiClientBuilder::default();
if let Some(cache) = cache {
pypi_client = pypi_client.cache(cache);
}
pypi_client.build()
};
let client = PypiClientBuilder::default().cache(cache).build();
// Resolve the dependencies.
let resolution = if uncached.is_empty() {
puffin_resolver::Resolution::default()
} else {
let wheel_finder = puffin_resolver::WheelFinder::new(&tags, &client)
.with_reporter(WheelFinderReporter::from(printer).with_length(uncached.len() as u64));
let resolution = wheel_finder.resolve(&uncached).await?;
let wheel_finder = puffin_resolver::WheelFinder::new(&tags, &client)
.with_reporter(WheelFinderReporter::from(printer).with_length(uncached.len() as u64));
let resolution = wheel_finder.resolve(&uncached).await?;
if !resolution.is_empty() {
let s = if resolution.len() == 1 { "" } else { "s" };
writeln!(
printer,
@ -157,9 +115,7 @@ pub(crate) async fn sync(
)
.dimmed()
)?;
resolution
};
}
let start = std::time::Instant::now();
@ -256,3 +212,73 @@ pub(crate) async fn sync(
Ok(ExitStatus::Success)
}
async fn find_uncached_requirements(
requirements: &[Requirement],
cache: Option<&Path>,
flags: SyncFlags,
python: &PythonExecutable,
) -> Result<(Vec<LocalDistribution>, Vec<Requirement>)> {
// Index all the already-installed packages in site-packages.
let site_packages = if flags.intersects(SyncFlags::IGNORE_INSTALLED) {
SitePackages::default()
} else {
SitePackages::from_executable(python).await?
};
// Index all the already-downloaded wheels in the cache.
let local_index = if let Some(cache) = cache {
LocalIndex::from_directory(cache).await?
} else {
LocalIndex::default()
};
Ok(split_uncached_requirements(
requirements,
&site_packages,
&local_index,
))
}
fn split_uncached_requirements(
requirements: &[Requirement],
site_packages: &SitePackages,
local_index: &LocalIndex,
) -> (Vec<LocalDistribution>, Vec<Requirement>) {
requirements
.iter()
.filter(|requirement| {
let package = PackageName::normalize(&requirement.name);
// Filter out already-installed packages.
if let Some(dist_info) = site_packages.get(&package) {
debug!(
"Requirement already satisfied: {} ({})",
package,
dist_info.version()
);
false
} else {
true
}
})
.partition_map(|requirement| {
let package = PackageName::normalize(&requirement.name);
// Identify any locally-available distributions that satisfy the requirement.
if let Some(distribution) = local_index
.get(&package)
.filter(|dist| requirement.is_satisfied_by(dist.version()))
{
debug!(
"Requirement already cached: {} ({})",
distribution.name(),
distribution.version()
);
Either::Left(distribution.clone())
} else {
debug!("Identified uncached requirement: {}", requirement);
Either::Right(requirement.clone())
}
})
}

View file

@ -1,4 +1,4 @@
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::sync::Arc;
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
@ -47,8 +47,11 @@ impl PypiClientBuilder {
}
#[must_use]
pub fn cache(mut self, cache: impl AsRef<Path>) -> Self {
self.cache = Some(PathBuf::from(cache.as_ref()));
pub fn cache<T>(mut self, cache: Option<T>) -> Self
where
T: Into<PathBuf>,
{
self.cache = cache.map(Into::into);
self
}

View file

@ -17,7 +17,7 @@ puffin-package = { path = "../puffin-package" }
anyhow = { workspace = true }
cacache = { workspace = true }
fs-err = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }

View file

@ -39,6 +39,19 @@ impl PythonExecutable {
})
}
pub fn from_venv(platform: Platform, venv: &Path, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let executable = platform.venv_python(venv);
let markers = markers::detect_cached_markers(&executable, cache)?;
Ok(Self {
platform,
venv: venv.to_path_buf(),
executable,
markers,
})
}
/// Returns the path to the Python virtual environment.
pub fn platform(&self) -> &Platform {
&self.platform