Avoid passing cached wheels to the resolver step (#70)

When we go to install a locked `requirements.txt`, if a wheel is already
available in the local cache, and matches the version specifiers, we can
just use it directly without fetching the package metadata. This speeds
up the no-op case by about 33%.

Closes https://github.com/astral-sh/puffin/issues/48.
This commit is contained in:
Charlie Marsh 2023-10-08 22:17:19 -04:00 committed by GitHub
parent 75cb7a0178
commit ba72950546
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 534 additions and 287 deletions

1
Cargo.lock generated
View file

@ -1780,6 +1780,7 @@ dependencies = [
"anyhow",
"cacache",
"install-wheel-rs",
"pep440_rs",
"puffin-client",
"puffin-interpreter",
"puffin-package",

View file

@ -1,10 +1,12 @@
use clap::Parser;
use fs_err::File;
use install_wheel_rs::{install_wheel, Error, InstallLocation};
#[cfg(feature = "rayon")]
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::PathBuf;
use std::str::FromStr;
use clap::Parser;
use fs_err::File;
#[cfg(feature = "rayon")]
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use install_wheel_rs::{install_wheel, Error, InstallLocation};
use wheel_filename::WheelFilename;
/// Low level install CLI, mainly used for testing

View file

@ -10,8 +10,6 @@ use fs_err::File;
use mailparse::MailHeaderMap;
use tracing::{debug, span, Level};
use wheel_filename::WheelFilename;
use crate::install_location::{InstallLocation, LockedDir};
use crate::wheel::{
extra_dist_info, install_data, parse_wheel_version, read_scripts_from_section,
@ -26,14 +24,7 @@ use crate::{read_record_file, Error, Script};
/// <https://packaging.python.org/en/latest/specifications/binary-distribution-format/#installing-a-wheel-distribution-1-0-py32-none-any-whl>
///
/// Wheel 1.0: <https://www.python.org/dev/peps/pep-0427/>
pub fn install_wheel(
location: &InstallLocation<LockedDir>,
wheel: &Path,
filename: &WheelFilename,
) -> Result<String, Error> {
let name = &filename.distribution;
let _my_span = span!(Level::DEBUG, "install_wheel", name = name.as_str());
pub fn install_wheel(location: &InstallLocation<LockedDir>, wheel: &Path) -> Result<(), Error> {
let base_location = location.venv_base();
// TODO(charlie): Pass this in.
@ -52,10 +43,10 @@ pub fn install_wheel(
.join("site-packages")
};
debug!(name = name.as_str(), "Getting wheel metadata");
let dist_info_prefix = find_dist_info(wheel)?;
let (name, _version) = read_metadata(&dist_info_prefix, wheel)?;
// TODO: Check that name and version match
let _my_span = span!(Level::DEBUG, "install_wheel", name);
// We're going step by step though
// https://packaging.python.org/en/latest/specifications/binary-distribution-format/#installing-a-wheel-distribution-1-0-py32-none-any-whl
@ -68,15 +59,15 @@ pub fn install_wheel(
// > 1.c If Root-Is-Purelib == true, unpack archive into purelib (site-packages).
// > 1.d Else unpack archive into platlib (site-packages).
// We always install in the same virtualenv site packages
debug!(name = name.as_str(), "Extracting file");
debug!(name, "Extracting file");
let num_unpacked = unpack_wheel_files(&site_packages, wheel)?;
debug!(name = name.as_str(), "Extracted {num_unpacked} files");
debug!(name, "Extracted {num_unpacked} files");
// Read the RECORD file.
let mut record_file = File::open(&wheel.join(format!("{dist_info_prefix}.dist-info/RECORD")))?;
let mut record = read_record_file(&mut record_file)?;
debug!(name = name.as_str(), "Writing entrypoints");
debug!(name, "Writing entrypoints");
let (console_scripts, gui_scripts) = parse_scripts(wheel, &dist_info_prefix, None)?;
write_script_entrypoints(&site_packages, location, &console_scripts, &mut record)?;
write_script_entrypoints(&site_packages, location, &gui_scripts, &mut record)?;
@ -85,7 +76,7 @@ pub fn install_wheel(
// 2.a Unpacked archive includes distribution-1.0.dist-info/ and (if there is data) distribution-1.0.data/.
// 2.b Move each subtree of distribution-1.0.data/ onto its destination path. Each subdirectory of distribution-1.0.data/ is a key into a dict of destination directories, such as distribution-1.0.data/(purelib|platlib|headers|scripts|data). The initially supported paths are taken from distutils.command.install.
if data_dir.is_dir() {
debug!(name = name.as_str(), "Installing data");
debug!(name, "Installing data");
install_data(
base_location.as_ref(),
&site_packages,
@ -101,14 +92,14 @@ pub fn install_wheel(
// 2.e Remove empty distribution-1.0.data directory.
fs::remove_dir_all(data_dir)?;
} else {
debug!(name = name.as_str(), "No data");
debug!(name, "No data");
}
debug!(name = name.as_str(), "Writing extra metadata");
debug!(name, "Writing extra metadata");
extra_dist_info(&site_packages, &dist_info_prefix, true, &mut record)?;
debug!(name = name.as_str(), "Writing record");
debug!(name, "Writing record");
let mut record_writer = csv::WriterBuilder::new()
.has_headers(false)
.escape(b'"')
@ -118,7 +109,7 @@ pub fn install_wheel(
record_writer.serialize(entry)?;
}
Ok(filename.get_tag())
Ok(())
}
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or

View file

@ -302,6 +302,25 @@ impl Requirement {
}
impl Requirement {
/// Returns `true` if the [`Version`] satisfies the [`Requirement`].
pub fn is_satisfied_by(&self, version: &Version) -> bool {
let Some(specifiers) =
self.version_or_url
.as_ref()
.and_then(|version_or_url| match version_or_url {
VersionOrUrl::VersionSpecifier(specifiers) => Some(specifiers),
// TODO(charlie): Support URL dependencies.
VersionOrUrl::Url(_) => None,
})
else {
return false;
};
specifiers
.iter()
.all(|specifier| specifier.contains(version))
}
/// Returns whether the markers apply for the given environment
pub fn evaluate_markers(&self, env: &MarkerEnvironment, extras: &[String]) -> bool {
if let Some(marker) = &self.marker {
@ -572,12 +591,12 @@ fn parse_extras(chars: &mut CharIter) -> Result<Option<Vec<String>>, Pep508Error
Some((pos, other)) => {
return Err(Pep508Error {
message: Pep508ErrorSource::String(format!(
"Expected an alphanumeric character starting the extra name, found '{other}'"
)),
"Expected an alphanumeric character starting the extra name, found '{other}'"
)),
start: pos,
len: 1,
input: chars.copy_chars(),
})
});
}
None => return Err(early_eof_error),
}
@ -601,10 +620,9 @@ fn parse_extras(chars: &mut CharIter) -> Result<Option<Vec<String>>, Pep508Error
start: pos,
len: 1,
input: chars.copy_chars(),
})
});
}
_=>{}
_ => {}
};
// wsp* after the identifier
chars.eat_whitespace();
@ -625,7 +643,7 @@ fn parse_extras(chars: &mut CharIter) -> Result<Option<Vec<String>>, Pep508Error
start: pos,
len: 1,
input: chars.copy_chars(),
})
});
}
None => return Err(early_eof_error),
}
@ -734,7 +752,7 @@ fn parse_version_specifier_parentheses(
let specifier = parse_specifier(chars, &buffer, start, end)?;
specifiers.push(specifier);
break Some(VersionOrUrl::VersionSpecifier(specifiers.into_iter().collect()));
},
}
Some((_, char)) => buffer.push(char),
None => return Err(Pep508Error {
message: Pep508ErrorSource::String("Missing closing parenthesis (expected ')', found end of dependency specification)".to_string()),
@ -789,7 +807,7 @@ fn parse(chars: &mut CharIter) -> Result<Requirement, Pep508Error> {
start: chars.get_pos(),
len: 1,
input: chars.copy_chars(),
})
});
}
};
@ -1036,6 +1054,7 @@ mod tests {
},
);
}
#[test]
fn error_extras_illegal_character() {
assert_err(

View file

@ -8,6 +8,7 @@ use tracing::{debug, info};
use platform_host::Platform;
use platform_tags::Tags;
use puffin_client::PypiClientBuilder;
use puffin_installer::{Distribution, RemoteDistribution};
use puffin_interpreter::{PythonExecutable, SitePackages};
use puffin_package::package_name::PackageName;
use puffin_package::requirements::Requirements;
@ -71,6 +72,35 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>, flags: SyncFlags) ->
return Ok(ExitStatus::Success);
}
// Detect any cached wheels.
let (uncached, cached) = if let Some(cache) = cache {
let mut cached = Vec::with_capacity(requirements.len());
let mut uncached = Vec::with_capacity(requirements.len());
let index = puffin_installer::LocalIndex::from_directory(cache).await?;
for requirement in requirements {
let package = PackageName::normalize(&requirement.name);
if let Some(distribution) = index
.get(&package)
.filter(|dist| requirement.is_satisfied_by(dist.version()))
{
debug!(
"Requirement already cached: {} ({})",
distribution.name(),
distribution.version()
);
cached.push(distribution.clone());
} else {
debug!("Identified uncached requirement: {}", requirement);
uncached.push(requirement);
}
}
(Requirements::new(uncached), cached)
} else {
(requirements, Vec::new())
};
// Determine the current environment markers.
let markers = python.markers();
@ -87,23 +117,35 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>, flags: SyncFlags) ->
};
// Resolve the dependencies.
let resolution = puffin_resolver::resolve(
&requirements,
markers,
&tags,
&client,
puffin_resolver::ResolveFlags::NO_DEPS,
)
.await?;
let resolution = if uncached.is_empty() {
puffin_resolver::Resolution::empty()
} else {
puffin_resolver::resolve(
&uncached,
markers,
&tags,
&client,
puffin_resolver::ResolveFlags::NO_DEPS,
)
.await?
};
// Install into the current environment.
let wheels = resolution.into_files().collect::<Vec<_>>();
let wheels = cached
.into_iter()
.map(|local| Ok(Distribution::Local(local)))
.chain(
resolution
.into_files()
.map(|file| Ok(Distribution::Remote(RemoteDistribution::from_file(file)?))),
)
.collect::<Result<Vec<_>>>()?;
puffin_installer::install(&wheels, &python, &client, cache).await?;
let s = if requirements.len() == 1 { "" } else { "s" };
let s = if wheels.len() == 1 { "" } else { "s" };
info!(
"Installed {} package{} in {}",
requirements.len(),
wheels.len(),
s,
elapsed(start.elapsed())
);

View file

@ -11,6 +11,7 @@ license.workspace = true
[dependencies]
install-wheel-rs = { path = "../install-wheel-rs", default-features = false }
pep440_rs = { path = "../pep440-rs" }
puffin-client = { path = "../puffin-client" }
puffin-interpreter = { path = "../puffin-interpreter" }
puffin-package = { path = "../puffin-package" }

View file

@ -0,0 +1,30 @@
use std::path::{Path, PathBuf};
static WHEEL_CACHE: &str = "wheels-v0";
#[derive(Debug)]
pub(crate) struct WheelCache<'a> {
path: &'a Path,
}
impl<'a> WheelCache<'a> {
/// Create a handle to the wheel cache.
pub(crate) fn new(path: &'a Path) -> Self {
Self { path }
}
/// Return the path at which a given wheel would be stored.
pub(crate) fn entry(&self, id: &str) -> PathBuf {
self.path.join(WHEEL_CACHE).join(id)
}
/// Initialize the wheel cache.
pub(crate) async fn init(&self) -> std::io::Result<()> {
tokio::fs::create_dir_all(self.path.join(WHEEL_CACHE)).await
}
/// Returns a handle to the wheel cache directory.
pub(crate) async fn read_dir(&self) -> std::io::Result<tokio::fs::ReadDir> {
tokio::fs::read_dir(self.path.join(WHEEL_CACHE)).await
}
}

View file

@ -0,0 +1,131 @@
use std::path::{Path, PathBuf};
use std::str::FromStr;
use anyhow::{anyhow, Result};
use pep440_rs::Version;
use puffin_client::File;
use puffin_package::package_name::PackageName;
use wheel_filename::WheelFilename;
/// A built distribution (wheel), which either exists remotely or locally.
#[derive(Debug, Clone)]
pub enum Distribution {
Remote(RemoteDistribution),
Local(LocalDistribution),
}
impl Distribution {
/// Return the normalized [`PackageName`] of the distribution.
pub fn name(&self) -> &PackageName {
match self {
Self::Remote(dist) => dist.name(),
Self::Local(dist) => dist.name(),
}
}
/// Return the [`Version`] of the distribution.
pub fn version(&self) -> &Version {
match self {
Self::Remote(dist) => dist.version(),
Self::Local(dist) => dist.version(),
}
}
/// Return an identifier for a built distribution (wheel). The ID should be equivalent to the
/// `.dist-info` directory name, i.e., `<distribution>-<version>.dist-info`, where
/// `distribution` is the normalized package name with hyphens replaced by underscores.
pub fn id(&self) -> String {
match self {
Self::Remote(dist) => dist.id(),
Self::Local(dist) => dist.id(),
}
}
}
/// A built distribution (wheel) that exists as a remote file (e.g., on `PyPI`).
#[derive(Debug, Clone)]
pub struct RemoteDistribution {
name: PackageName,
version: Version,
file: File,
}
impl RemoteDistribution {
/// Try to parse a remote distribution from a remote file (like `django-5.0a1-py3-none-any.whl`).
pub fn from_file(file: File) -> Result<Self> {
let filename = WheelFilename::from_str(&file.filename)?;
let name = PackageName::normalize(&filename.distribution);
let version = Version::from_str(&filename.version).map_err(|err| anyhow!(err))?;
Ok(Self {
name,
version,
file,
})
}
pub fn name(&self) -> &PackageName {
&self.name
}
pub fn version(&self) -> &Version {
&self.version
}
pub fn file(&self) -> &File {
&self.file
}
pub fn id(&self) -> String {
format!("{}-{}", self.name().replace('-', "_"), self.version())
}
}
/// A built distribution (wheel) that exists as a local file (e.g., in the wheel cache).
#[derive(Debug, Clone)]
pub struct LocalDistribution {
name: PackageName,
version: Version,
path: PathBuf,
}
impl LocalDistribution {
/// Try to parse a cached distribution from a directory name (like `django-5.0a1`).
pub(crate) fn try_from_path(path: &Path) -> Result<Option<Self>> {
let Some(file_name) = path.file_name() else {
return Ok(None);
};
let Some(file_name) = file_name.to_str() else {
return Ok(None);
};
let Some((name, version)) = file_name.split_once('-') else {
return Ok(None);
};
let name = PackageName::normalize(name);
let version = Version::from_str(version).map_err(|err| anyhow!(err))?;
let path = path.to_path_buf();
Ok(Some(LocalDistribution {
name,
version,
path,
}))
}
pub fn name(&self) -> &PackageName {
&self.name
}
pub fn version(&self) -> &Version {
&self.version
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn id(&self) -> String {
format!("{}-{}", self.name().replace('-', "_"), self.version())
}
}

View file

@ -0,0 +1,40 @@
use std::collections::BTreeMap;
use std::path::Path;
use anyhow::Result;
use crate::cache::WheelCache;
use puffin_package::package_name::PackageName;
use crate::distribution::LocalDistribution;
/// A local index of cached distributions.
#[derive(Debug)]
pub struct LocalIndex(BTreeMap<PackageName, LocalDistribution>);
impl LocalIndex {
/// Build an index of cached distributions from a directory.
pub async fn from_directory(path: &Path) -> Result<Self> {
let mut index = BTreeMap::new();
let cache = WheelCache::new(path);
let Ok(mut dir) = cache.read_dir().await else {
return Ok(Self(index));
};
while let Some(entry) = dir.next_entry().await? {
if entry.file_type().await?.is_dir() {
if let Some(dist_info) = LocalDistribution::try_from_path(&entry.path())? {
index.insert(dist_info.name().clone(), dist_info);
}
}
}
Ok(Self(index))
}
/// Returns a distribution from the index, if it exists.
pub fn get(&self, name: &PackageName) -> Option<&LocalDistribution> {
self.0.get(name)
}
}

View file

@ -0,0 +1,210 @@
use std::path::Path;
use anyhow::Result;
use cacache::{Algorithm, Integrity};
use rayon::iter::ParallelBridge;
use rayon::iter::ParallelIterator;
use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{debug, info};
use url::Url;
use zip::ZipArchive;
use install_wheel_rs::{unpacked, InstallLocation};
use puffin_client::PypiClient;
use puffin_interpreter::PythonExecutable;
use crate::cache::WheelCache;
use crate::distribution::{Distribution, RemoteDistribution};
use crate::vendor::CloneableSeekableReader;
/// Install a set of wheels into a Python virtual environment.
pub async fn install(
wheels: &[Distribution],
python: &PythonExecutable,
client: &PypiClient,
cache: Option<&Path>,
) -> Result<()> {
if wheels.is_empty() {
return Ok(());
}
// Create the wheel cache subdirectory, if necessary.
let wheel_cache = cache.map(WheelCache::new);
if let Some(wheel_cache) = wheel_cache.as_ref() {
wheel_cache.init().await?;
}
// Phase 1: Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut downloads = Vec::with_capacity(wheels.len());
for wheel in wheels {
let Distribution::Remote(remote) = wheel else {
continue;
};
debug!("Downloading: {}", remote.file().filename);
fetches.spawn(fetch_wheel(
remote.clone(),
client.clone(),
cache.map(Path::to_path_buf),
));
}
if !fetches.is_empty() {
let s = if fetches.len() == 1 { "" } else { "s" };
info!("Downloading {} wheel{}", fetches.len(), s);
}
while let Some(result) = fetches.join_next().await.transpose()? {
downloads.push(result?);
}
if !downloads.is_empty() {
let s = if downloads.len() == 1 { "" } else { "s" };
debug!("Unpacking {} wheel{}", downloads.len(), s);
}
let staging = tempfile::tempdir()?;
// Phase 2: Unpack the wheels into the cache.
for download in downloads {
let filename = download.remote.file().filename.clone();
let id = download.remote.id();
debug!("Unpacking: {}", filename);
// Unzip the wheel.
tokio::task::spawn_blocking({
let target = staging.path().join(&id);
move || unzip_wheel(download, &target)
})
.await??;
// Write the unzipped wheel to the cache (atomically).
if let Some(wheel_cache) = wheel_cache.as_ref() {
debug!("Caching wheel: {}", filename);
tokio::fs::rename(staging.path().join(&id), wheel_cache.entry(&id)).await?;
}
}
let s = if wheels.len() == 1 { "" } else { "s" };
info!(
"Linking package{}: {}",
s,
wheels
.iter()
.map(Distribution::id)
.collect::<Vec<_>>()
.join(" ")
);
// Phase 3: Install each wheel.
let location = InstallLocation::new(python.venv().to_path_buf(), python.simple_version());
let locked_dir = location.acquire_lock()?;
for wheel in wheels {
match wheel {
Distribution::Remote(remote) => {
let id = remote.id();
let dir = wheel_cache.as_ref().map_or_else(
|| staging.path().join(&id),
|wheel_cache| wheel_cache.entry(&id),
);
unpacked::install_wheel(&locked_dir, &dir)?;
}
Distribution::Local(local) => {
unpacked::install_wheel(&locked_dir, local.path())?;
}
}
}
Ok(())
}
#[derive(Debug, Clone)]
struct InMemoryDistribution {
/// The remote file from which this wheel was downloaded.
remote: RemoteDistribution,
/// The contents of the wheel.
buffer: Vec<u8>,
}
/// Download a wheel to a given path.
async fn fetch_wheel(
remote: RemoteDistribution,
client: PypiClient,
cache: Option<impl AsRef<Path>>,
) -> Result<InMemoryDistribution> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&remote.file().hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {:?}", remote.file().filename);
return Ok(InMemoryDistribution { remote, buffer });
}
}
let url = Url::parse(&remote.file().url)?;
let reader = client.stream_external(&url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(remote.file().size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(InMemoryDistribution { remote, buffer })
}
/// Write a wheel into the target directory.
fn unzip_wheel(wheel: InMemoryDistribution, target: &Path) -> Result<()> {
// Read the wheel into a buffer.
let reader = std::io::Cursor::new(wheel.buffer);
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
// Unzip in parallel.
(0..archive.len())
.par_bridge()
.map(|file_number| {
let mut archive = archive.clone();
let mut file = archive.by_index(file_number)?;
// Determine the path of the file within the wheel.
let file_path = match file.enclosed_name() {
Some(path) => path.to_owned(),
None => return Ok(()),
};
// Create necessary parent directories.
let path = target.join(file_path);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
// Write the file.
let mut outfile = std::fs::File::create(&path)?;
std::io::copy(&mut file, &mut outfile)?;
// Set permissions.
#[cfg(unix)]
{
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = file.unix_mode() {
std::fs::set_permissions(&path, Permissions::from_mode(mode))?;
}
}
Ok(())
})
.collect::<Result<_>>()
}

View file

@ -1,230 +1,9 @@
use std::path::Path;
use std::str::FromStr;
use anyhow::Result;
use cacache::{Algorithm, Integrity};
use rayon::iter::ParallelBridge;
use rayon::iter::ParallelIterator;
use tokio::task::JoinSet;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use tracing::{debug, info};
use url::Url;
use zip::ZipArchive;
use install_wheel_rs::{unpacked, InstallLocation};
use puffin_client::{File, PypiClient};
use puffin_interpreter::PythonExecutable;
use puffin_package::package_name::PackageName;
use wheel_filename::WheelFilename;
use crate::vendor::CloneableSeekableReader;
pub use distribution::{Distribution, RemoteDistribution};
pub use index::LocalIndex;
pub use install::install;
mod cache;
mod distribution;
mod index;
mod install;
mod vendor;
static WHEEL_CACHE: &str = "wheels-v0";
/// Install a set of wheels into a Python virtual environment.
pub async fn install(
wheels: &[File],
python: &PythonExecutable,
client: &PypiClient,
cache: Option<&Path>,
) -> Result<()> {
// Create the cache subdirectory, if necessary.
if let Some(cache) = cache {
tokio::fs::create_dir_all(cache.join(WHEEL_CACHE)).await?;
}
if wheels.is_empty() {
return Ok(());
}
// Phase 1: Fetch the wheels in parallel.
let mut fetches = JoinSet::new();
let mut downloads = Vec::with_capacity(wheels.len());
for wheel in wheels {
// If the unzipped wheel exists in the cache, skip it.
let key = cache_key(wheel)?;
if let Some(cache) = cache {
if cache.join(WHEEL_CACHE).join(&key).exists() {
debug!("Found wheel in cache: {}", wheel.filename);
continue;
}
}
debug!("Downloading: {}", wheel.filename);
fetches.spawn(fetch_wheel(
wheel.clone(),
client.clone(),
cache.map(Path::to_path_buf),
));
}
if !fetches.is_empty() {
let s = if fetches.len() == 1 { "" } else { "s" };
info!("Downloading {} wheel{}", fetches.len(), s);
}
while let Some(result) = fetches.join_next().await.transpose()? {
downloads.push(result?);
}
if !downloads.is_empty() {
let s = if downloads.len() == 1 { "" } else { "s" };
debug!("Unpacking {} wheel{}", downloads.len(), s);
}
let temp_dir = tempfile::tempdir()?;
// Phase 2: Unpack the wheels into the cache.
for wheel in downloads {
let filename = wheel.file.filename.clone();
let key = cache_key(&wheel.file)?;
debug!("Unpacking: {}", filename);
// Unzip the wheel.
tokio::task::spawn_blocking({
let target = temp_dir.path().join(&key);
move || unzip_wheel(wheel, &target)
})
.await??;
// Write the unzipped wheel to the cache (atomically).
if let Some(cache) = cache {
debug!("Caching wheel: {}", filename);
tokio::fs::rename(
temp_dir.path().join(&key),
cache.join(WHEEL_CACHE).join(&key),
)
.await?;
}
}
let s = if wheels.len() == 1 { "" } else { "s" };
info!(
"Linking package{}: {}",
s,
wheels
.iter()
.map(cache_key)
.collect::<Result<Vec<_>>>()?
.join(" ")
);
// Phase 3: Install each wheel.
let location = InstallLocation::new(python.venv().to_path_buf(), python.simple_version());
let locked_dir = location.acquire_lock()?;
for wheel in wheels {
let key = cache_key(wheel)?;
let dir = cache.map_or_else(
|| temp_dir.path().join(&key),
|cache| cache.join(WHEEL_CACHE).join(&key),
);
let wheel_filename = WheelFilename::from_str(&wheel.filename)?;
// TODO(charlie): Should this be async?
unpacked::install_wheel(&locked_dir, &dir, &wheel_filename)?;
}
Ok(())
}
/// Return the cache key for an unzipped wheel. The cache key should be equivalent to the
/// `.dist-info` directory name, i.e., `<name>-<version>.dist-info`, where `name` is the
/// normalized package name.
fn cache_key(wheel: &File) -> Result<String> {
let filename = WheelFilename::from_str(&wheel.filename)?;
Ok(format!(
"{}-{}",
PackageName::normalize(filename.distribution),
filename.version
))
}
#[derive(Debug, Clone)]
struct FetchedWheel {
file: File,
buffer: Vec<u8>,
}
/// Download a wheel to a given path.
async fn fetch_wheel(
file: File,
client: PypiClient,
cache: Option<impl AsRef<Path>>,
) -> Result<FetchedWheel> {
// Parse the wheel's SRI.
let sri = Integrity::from_hex(&file.hashes.sha256, Algorithm::Sha256)?;
// Read from the cache, if possible.
if let Some(cache) = cache.as_ref() {
if let Ok(buffer) = cacache::read_hash(&cache, &sri).await {
debug!("Extracted wheel from cache: {:?}", file.filename);
return Ok(FetchedWheel { file, buffer });
}
}
let url = Url::parse(&file.url)?;
let reader = client.stream_external(&url).await?;
// Read into a buffer.
let mut buffer = Vec::with_capacity(file.size);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;
// Write the buffer to the cache, if possible.
if let Some(cache) = cache.as_ref() {
cacache::write_hash(&cache, &buffer).await?;
}
Ok(FetchedWheel { file, buffer })
}
/// Write a wheel into the target directory.
fn unzip_wheel(wheel: FetchedWheel, target: &Path) -> Result<()> {
// Read the wheel into a buffer.
let reader = std::io::Cursor::new(wheel.buffer);
let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
// Unzip in parallel.
(0..archive.len())
.par_bridge()
.map(|file_number| {
let mut archive = archive.clone();
let mut file = archive.by_index(file_number)?;
// Determine the path of the file within the wheel.
let file_path = match file.enclosed_name() {
Some(path) => path.to_owned(),
None => return Ok(()),
};
// Create necessary parent directories.
let path = target.join(file_path);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
// Write the file.
let mut outfile = std::fs::File::create(&path)?;
std::io::copy(&mut file, &mut outfile)?;
// Set permissions.
#[cfg(unix)]
{
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = file.unix_mode() {
std::fs::set_permissions(&path, Permissions::from_mode(mode))?;
}
}
Ok(())
})
.collect::<Result<_>>()
}

View file

@ -44,6 +44,15 @@ impl Requirements {
}
}
impl IntoIterator for Requirements {
type Item = Requirement;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl FromStr for Requirements {
type Err = Pep508Error;

View file

@ -9,7 +9,7 @@ use thiserror::Error;
use tracing::{debug, info};
use pep440_rs::Version;
use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl};
use pep508_rs::{MarkerEnvironment, Requirement};
use platform_tags::Tags;
use puffin_client::{File, PypiClient, SimpleJson};
use puffin_package::metadata::Metadata21;
@ -21,6 +21,10 @@ use wheel_filename::WheelFilename;
pub struct Resolution(HashMap<PackageName, PinnedPackage>);
impl Resolution {
pub fn empty() -> Self {
Self(HashMap::new())
}
/// Iterate over the pinned packages in this resolution.
pub fn iter(&self) -> impl Iterator<Item = (&PackageName, &PinnedPackage)> {
self.0.iter()
@ -128,16 +132,6 @@ pub async fn resolve(
let result: Response = result?;
match result {
Response::Package(requirement, metadata) => {
// TODO(charlie): Support URLs. Right now, we treat a URL as an unpinned dependency.
let specifiers =
requirement
.version_or_url
.as_ref()
.and_then(|version_or_url| match version_or_url {
VersionOrUrl::VersionSpecifier(specifiers) => Some(specifiers),
VersionOrUrl::Url(_) => None,
});
// Pick a version that satisfies the requirement.
let Some(file) = metadata.files.iter().rev().find(|file| {
// We only support wheels for now.
@ -153,9 +147,7 @@ pub async fn resolve(
return false;
}
specifiers
.iter()
.all(|specifier| specifier.contains(&version))
requirement.is_satisfied_by(&version)
}) else {
return Err(ResolveError::NotFound(requirement));
};