mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-02 12:59:45 +00:00
Move source distribution unpacking out of build (#2294)
## Summary If a user provides a source distribution via a direct path, it can either be an archive (like a `.tar.gz` or `.zip` file) or a directory. If the former, we need to extract (e.g., unzip) the contents at some point. Previously, this extraction was in `uv-build`; this PR lifts it up to the distribution database. The first benefit here is that various methods that take the distribution are now simpler, as they can assume a directory. The second benefit is that we no longer extract _multiple times_ when working with a source distribution. (Previously, if we tried to get the metadata, then fell back and built the wheel, we'd extract the wheel _twice_.)
This commit is contained in:
parent
e321a2767d
commit
26f6919465
7 changed files with 107 additions and 47 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -4282,7 +4282,6 @@ dependencies = [
|
|||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
"uv-extract",
|
||||
"uv-fs",
|
||||
"uv-interpreter",
|
||||
"uv-traits",
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ distribution-types = { path = "../distribution-types" }
|
|||
pep508_rs = { path = "../pep508-rs" }
|
||||
platform-host = { path = "../platform-host" }
|
||||
pypi-types = { path = "../pypi-types" }
|
||||
uv-extract = { path = "../uv-extract" }
|
||||
uv-fs = { path = "../uv-fs" }
|
||||
uv-interpreter = { path = "../uv-interpreter" }
|
||||
uv-traits = { path = "../uv-traits", features = ["serde"] }
|
||||
|
|
|
|||
|
|
@ -62,10 +62,6 @@ static DEFAULT_BACKEND: Lazy<Pep517Backend> = Lazy::new(|| Pep517Backend {
|
|||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
IO(#[from] io::Error),
|
||||
#[error("Failed to extract archive: {0}")]
|
||||
Extraction(PathBuf, #[source] uv_extract::Error),
|
||||
#[error("Unsupported archive format (extension not recognized): {0}")]
|
||||
UnsupportedArchiveType(String),
|
||||
#[error("Invalid source distribution: {0}")]
|
||||
InvalidSourceDist(String),
|
||||
#[error("Invalid pyproject.toml")]
|
||||
|
|
@ -74,8 +70,6 @@ pub enum Error {
|
|||
EditableSetupPy,
|
||||
#[error("Failed to install requirements from {0}")]
|
||||
RequirementsInstall(&'static str, #[source] anyhow::Error),
|
||||
#[error("Source distribution not found at: {0}")]
|
||||
NotFound(PathBuf),
|
||||
#[error("Failed to create temporary virtualenv")]
|
||||
Virtualenv(#[from] uv_virtualenv::Error),
|
||||
#[error("Failed to run {0}")]
|
||||
|
|
@ -365,38 +359,10 @@ impl SourceBuild {
|
|||
) -> Result<Self, Error> {
|
||||
let temp_dir = tempdir_in(build_context.cache().root())?;
|
||||
|
||||
let metadata = match fs::metadata(source) {
|
||||
Ok(metadata) => metadata,
|
||||
Err(err) if err.kind() == io::ErrorKind::NotFound => {
|
||||
return Err(Error::NotFound(source.to_path_buf()));
|
||||
}
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
let source_root = if metadata.is_dir() {
|
||||
source.to_path_buf()
|
||||
} else {
|
||||
debug!("Unpacking for build: {}", source.display());
|
||||
|
||||
let extracted = temp_dir.path().join("extracted");
|
||||
|
||||
// Unzip the archive into the temporary directory.
|
||||
let reader = fs_err::tokio::File::open(source).await?;
|
||||
uv_extract::stream::archive(tokio::io::BufReader::new(reader), source, &extracted)
|
||||
.await
|
||||
.map_err(|err| Error::Extraction(extracted.clone(), err))?;
|
||||
|
||||
// Extract the top-level directory from the archive.
|
||||
match uv_extract::strip_component(&extracted) {
|
||||
Ok(top_level) => top_level,
|
||||
Err(uv_extract::Error::NonSingularArchive(_)) => extracted,
|
||||
Err(err) => return Err(Error::Extraction(extracted.clone(), err)),
|
||||
}
|
||||
};
|
||||
let source_tree = if let Some(subdir) = subdirectory {
|
||||
source_root.join(subdir)
|
||||
source.join(subdir)
|
||||
} else {
|
||||
source_root
|
||||
source.to_path_buf()
|
||||
};
|
||||
|
||||
let default_backend: Pep517Backend = DEFAULT_BACKEND.clone();
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ pub(crate) struct BuildArgs {
|
|||
/// Directory to story the built wheel in
|
||||
#[clap(short, long)]
|
||||
wheels: Option<PathBuf>,
|
||||
/// The source distribution to build, either a directory or a source archive.
|
||||
/// The source distribution to build, as a directory.
|
||||
sdist: PathBuf,
|
||||
/// The subdirectory to build within the source distribution.
|
||||
subdirectory: Option<PathBuf>,
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use std::path::PathBuf;
|
||||
use tokio::task::JoinError;
|
||||
use zip::result::ZipError;
|
||||
|
||||
|
|
@ -56,6 +57,8 @@ pub enum Error {
|
|||
DirWithoutEntrypoint,
|
||||
#[error("Failed to extract source distribution")]
|
||||
Extract(#[from] uv_extract::Error),
|
||||
#[error("Source distribution not found at: {0}")]
|
||||
NotFound(PathBuf),
|
||||
|
||||
/// Should not occur; only seen when another task panicked.
|
||||
#[error("The task executor is broken, did some other task panic?")]
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use anyhow::Result;
|
|||
use fs_err::tokio as fs;
|
||||
use futures::{FutureExt, TryStreamExt};
|
||||
use reqwest::Response;
|
||||
use tempfile::TempDir;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{debug, info_span, instrument, Instrument};
|
||||
use url::Url;
|
||||
|
|
@ -113,6 +114,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
||||
}
|
||||
FileLocation::Path(path) => {
|
||||
// Create a distribution to represent the local path.
|
||||
let path_source_dist = PathSourceDist {
|
||||
name: registry_source_dist.filename.name.clone(),
|
||||
url: VerbatimUrl::unknown(
|
||||
|
|
@ -121,7 +123,14 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
path: path.clone(),
|
||||
editable: false,
|
||||
};
|
||||
return self.path(source_dist, &path_source_dist).boxed().await;
|
||||
|
||||
// If necessary, extract the archive.
|
||||
let extracted = self.extract_archive(&path_source_dist).await?;
|
||||
|
||||
return self
|
||||
.path(source_dist, &path_source_dist, extracted.path())
|
||||
.boxed()
|
||||
.await;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -147,7 +156,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
self.git(source_dist, git_source_dist).boxed().await?
|
||||
}
|
||||
SourceDist::Path(path_source_dist) => {
|
||||
self.path(source_dist, path_source_dist).boxed().await?
|
||||
// If necessary, extract the archive.
|
||||
let extracted = self.extract_archive(path_source_dist).await?;
|
||||
|
||||
self.path(source_dist, path_source_dist, extracted.path())
|
||||
.boxed()
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -194,6 +208,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
Url::parse(url).map_err(|err| Error::Url(url.clone(), err))?
|
||||
}
|
||||
FileLocation::Path(path) => {
|
||||
// Create a distribution to represent the local path.
|
||||
let path_source_dist = PathSourceDist {
|
||||
name: registry_source_dist.filename.name.clone(),
|
||||
url: VerbatimUrl::unknown(
|
||||
|
|
@ -202,8 +217,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
path: path.clone(),
|
||||
editable: false,
|
||||
};
|
||||
|
||||
// If necessary, extract the archive.
|
||||
let extracted = self.extract_archive(&path_source_dist).await?;
|
||||
|
||||
return self
|
||||
.path_metadata(source_dist, &path_source_dist)
|
||||
.path_metadata(source_dist, &path_source_dist, extracted.path())
|
||||
.boxed()
|
||||
.await;
|
||||
}
|
||||
|
|
@ -233,7 +252,10 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.await?
|
||||
}
|
||||
SourceDist::Path(path_source_dist) => {
|
||||
self.path_metadata(source_dist, path_source_dist)
|
||||
// If necessary, extract the archive.
|
||||
let extracted = self.extract_archive(path_source_dist).await?;
|
||||
|
||||
self.path_metadata(source_dist, path_source_dist, extracted.path())
|
||||
.boxed()
|
||||
.await?
|
||||
}
|
||||
|
|
@ -468,6 +490,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
&self,
|
||||
source_dist: &SourceDist,
|
||||
path_source_dist: &PathSourceDist,
|
||||
source_root: &Path,
|
||||
) -> Result<BuiltWheelMetadata, Error> {
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
|
|
@ -510,7 +533,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.map(|reporter| reporter.on_build_start(source_dist));
|
||||
|
||||
let (disk_filename, filename, metadata) = self
|
||||
.build_source_dist(source_dist, &path_source_dist.path, None, &cache_shard)
|
||||
.build_source_dist(source_dist, source_root, None, &cache_shard)
|
||||
.await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
|
@ -540,6 +563,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
&self,
|
||||
source_dist: &SourceDist,
|
||||
path_source_dist: &PathSourceDist,
|
||||
source_root: &Path,
|
||||
) -> Result<Metadata21, Error> {
|
||||
let cache_shard = self.build_context.cache().shard(
|
||||
CacheBucket::BuiltWheels,
|
||||
|
|
@ -586,7 +610,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
|
||||
// If the backend supports `prepare_metadata_for_build_wheel`, use it.
|
||||
if let Some(metadata) = self
|
||||
.build_source_dist_metadata(source_dist, &path_source_dist.path, None)
|
||||
.build_source_dist_metadata(source_dist, source_root, None)
|
||||
.boxed()
|
||||
.await?
|
||||
{
|
||||
|
|
@ -609,7 +633,7 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
.map(|reporter| reporter.on_build_start(source_dist));
|
||||
|
||||
let (_disk_filename, _filename, metadata) = self
|
||||
.build_source_dist(source_dist, &path_source_dist.path, None, &cache_shard)
|
||||
.build_source_dist(source_dist, source_root, None, &cache_shard)
|
||||
.await?;
|
||||
|
||||
if let Some(task) = task {
|
||||
|
|
@ -834,6 +858,51 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
Ok((fetch, subdirectory))
|
||||
}
|
||||
|
||||
/// Extract a local source distribution, if it's stored as a `.tar.gz` or `.zip` archive.
|
||||
///
|
||||
/// TODO(charlie): Consider storing the extracted source in the cache, to avoid re-extracting
|
||||
/// on every invocation.
|
||||
async fn extract_archive(
|
||||
&self,
|
||||
source_dist: &'a PathSourceDist,
|
||||
) -> Result<ExtractedSource<'a>, Error> {
|
||||
// If necessary, unzip the source distribution.
|
||||
let path = source_dist.path.as_path();
|
||||
|
||||
let metadata = match fs::metadata(&path).await {
|
||||
Ok(metadata) => metadata,
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
|
||||
return Err(Error::NotFound(path.to_path_buf()));
|
||||
}
|
||||
Err(err) => return Err(Error::CacheRead(err)),
|
||||
};
|
||||
|
||||
if metadata.is_dir() {
|
||||
Ok(ExtractedSource::Directory(path))
|
||||
} else {
|
||||
debug!("Unpacking for build: {source_dist}");
|
||||
|
||||
let temp_dir = tempfile::tempdir_in(self.build_context.cache().root())
|
||||
.map_err(Error::CacheWrite)?;
|
||||
|
||||
// Unzip the archive into the temporary directory.
|
||||
let reader = fs_err::tokio::File::open(&path)
|
||||
.await
|
||||
.map_err(Error::CacheRead)?;
|
||||
uv_extract::stream::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path())
|
||||
.await?;
|
||||
|
||||
// Extract the top-level directory from the archive.
|
||||
let extracted = match uv_extract::strip_component(temp_dir.path()) {
|
||||
Ok(top_level) => top_level,
|
||||
Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.path().to_path_buf(),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
|
||||
Ok(ExtractedSource::Archive(extracted, temp_dir))
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a source distribution, storing the built wheel in the cache.
|
||||
///
|
||||
/// Returns the un-normalized disk filename, the parsed, normalized filename and the metadata
|
||||
|
|
@ -950,6 +1019,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
) -> Result<(Dist, String, WheelFilename, Metadata21), Error> {
|
||||
debug!("Building (editable) {editable}");
|
||||
|
||||
// Verify that the editable exists.
|
||||
if !editable.path.exists() {
|
||||
return Err(Error::NotFound(editable.path.clone()));
|
||||
}
|
||||
|
||||
// Build the wheel.
|
||||
let disk_filename = self
|
||||
.build_context
|
||||
|
|
@ -980,6 +1054,26 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ExtractedSource<'a> {
|
||||
/// The source distribution was passed in as a directory, and so doesn't need to be extracted.
|
||||
Directory(&'a Path),
|
||||
/// The source distribution was passed in as an archive, and was extracted into a temporary
|
||||
/// directory.
|
||||
#[allow(dead_code)]
|
||||
Archive(PathBuf, TempDir),
|
||||
}
|
||||
|
||||
impl ExtractedSource<'_> {
|
||||
/// Return the [`Path`] to the extracted source root.
|
||||
fn path(&self) -> &Path {
|
||||
match self {
|
||||
ExtractedSource::Directory(path) => path,
|
||||
ExtractedSource::Archive(path, _) => path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read an existing HTTP-cached [`Manifest`], if it exists.
|
||||
pub(crate) fn read_http_manifest(cache_entry: &CacheEntry) -> Result<Option<Manifest>, Error> {
|
||||
match fs_err::File::open(cache_entry.path()) {
|
||||
|
|
|
|||
|
|
@ -3283,7 +3283,6 @@ fn missing_editable_requirement() -> Result<()> {
|
|||
|
||||
----- stderr -----
|
||||
error: Failed to build editables
|
||||
Caused by: Failed to build editable: file://[TEMP_DIR]/django-3.2.8.tar.gz
|
||||
Caused by: Source distribution not found at: /[TEMP_DIR]/django-3.2.8.tar.gz
|
||||
"###);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue