Add hint for venv in source distribution error (#15202)

Venvs should not be in source distributions, and on Unix, we now reject
them for having a link outside the source directory. This PR adds a hint
for that since users were confused (#15096).

In the process, we're differentiating IO errors for format error for
uncompression generally.

Fixes #15096
This commit is contained in:
konsti 2025-08-19 00:07:57 +02:00 committed by GitHub
parent 724e4c7e5e
commit 4f4492dd53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 167 additions and 34 deletions

5
Cargo.lock generated
View file

@ -184,9 +184,8 @@ dependencies = [
[[package]]
name = "astral-tokio-tar"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1abb2bfba199d9ec4759b797115ba6ae435bdd920ce99783bb53aeff57ba919b"
version = "0.5.3"
source = "git+https://github.com/astral-sh/tokio-tar?rev=f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688#f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688"
dependencies = [
"filetime",
"futures-core",

View file

@ -77,7 +77,7 @@ anstream = { version = "0.6.15" }
anyhow = { version = "1.0.89" }
arcstr = { version = "1.2.0" }
arrayvec = { version = "0.7.6" }
astral-tokio-tar = { version = "0.5.2" }
astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", rev = "f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688" }
async-channel = { version = "2.3.1" }
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
async-trait = { version = "0.1.82" }

View file

@ -2,12 +2,14 @@ use std::{ffi::OsString, path::PathBuf};
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Failed to read from zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Failed to read from zip file")]
AsyncZip(#[from] async_zip::error::ZipError),
#[error("I/O operation failed during extraction")]
Io(#[from] std::io::Error),
Io(std::io::Error),
#[error("Invalid zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Invalid zip file structure")]
AsyncZip(#[from] async_zip::error::ZipError),
#[error("Invalid tar file")]
Tar(#[from] tokio_tar::TarError),
#[error(
"The top-level of the archive must only contain a list directory, but it contains: {0:?}"
)]
@ -90,6 +92,31 @@ pub enum Error {
}
impl Error {
/// When reading from an archive, the error can either be an IO error from the underlying
/// operating system, or an error with the archive. Both get wrapper into an IO error through
/// e.g., `io::copy`. This method extracts zip and tar errors, to distinguish them from invalid
/// archives.
pub(crate) fn io_or_compression(err: std::io::Error) -> Self {
if err.kind() != std::io::ErrorKind::Other {
return Self::Io(err);
}
let err = match err.downcast::<tokio_tar::TarError>() {
Ok(tar_err) => return Self::Tar(tar_err),
Err(err) => err,
};
let err = match err.downcast::<async_zip::error::ZipError>() {
Ok(zip_err) => return Self::AsyncZip(zip_err),
Err(err) => err,
};
let err = match err.downcast::<zip::result::ZipError>() {
Ok(zip_err) => return Self::Zip(zip_err),
Err(err) => err,
};
Self::Io(err)
}
/// Returns `true` if the error is due to the server not supporting HTTP streaming. Most
/// commonly, this is due to serving ZIP files with features that are incompatible with
/// streaming, like data descriptors.

View file

@ -126,7 +126,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
let is_dir = entry.reader().entry().dir()?;
let computed = if is_dir {
if directories.insert(path.clone()) {
fs_err::tokio::create_dir_all(path).await?;
fs_err::tokio::create_dir_all(path)
.await
.map_err(Error::Io)?;
}
// If this is a directory, we expect the CRC32 to be 0.
@ -159,7 +161,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
} else {
if let Some(parent) = path.parent() {
if directories.insert(parent.to_path_buf()) {
fs_err::tokio::create_dir_all(parent).await?;
fs_err::tokio::create_dir_all(parent)
.await
.map_err(Error::Io)?;
}
}
@ -176,7 +180,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
tokio::io::BufWriter::new(file)
};
let mut reader = entry.reader_mut().compat();
let bytes_read = tokio::io::copy(&mut reader, &mut writer).await?;
let bytes_read = tokio::io::copy(&mut reader, &mut writer)
.await
.map_err(Error::io_or_compression)?;
let reader = reader.into_inner();
(bytes_read, reader)
@ -188,12 +194,15 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
);
// Read the existing file into memory.
let existing_contents = fs_err::tokio::read(&path).await?;
let existing_contents = fs_err::tokio::read(&path).await.map_err(Error::Io)?;
// Read the entry into memory.
let mut expected_contents = Vec::with_capacity(existing_contents.len());
let entry_reader = entry.reader_mut();
let bytes_read = entry_reader.read_to_end(&mut expected_contents).await?;
let bytes_read = entry_reader
.read_to_end(&mut expected_contents)
.await
.map_err(Error::io_or_compression)?;
// Verify that the existing file contents match the expected contents.
if existing_contents != expected_contents {
@ -204,7 +213,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
(bytes_read as u64, entry_reader)
}
Err(err) => return Err(err.into()),
Err(err) => return Err(Error::Io(err)),
};
// Validate the uncompressed size.
@ -458,13 +467,17 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
let path = target.join(relpath);
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
let permissions = fs_err::tokio::metadata(&path)
.await
.map_err(Error::Io)?
.permissions();
if permissions.mode() & 0o111 != 0o111 {
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
.await
.map_err(Error::Io)?;
}
}
}
@ -522,10 +535,10 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
// Determine whether the reader is exhausted.
if !skip_validation {
let mut buffer = [0; 1];
if reader.read(&mut buffer).await? > 0 {
if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 {
// If the buffer contains a single null byte, ignore it.
if buffer[0] == 0 {
if reader.read(&mut buffer).await? > 0 {
if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 {
return Err(Error::TrailingContents);
}
@ -618,7 +631,9 @@ pub async fn untar_gz<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false)
.set_allow_external_symlinks(false)
.build();
Ok(untar_in(archive, target.as_ref()).await?)
untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
}
/// Unpack a `.tar.bz2` archive into the target directory, without requiring `Seek`.
@ -638,7 +653,9 @@ pub async fn untar_bz2<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false)
.set_allow_external_symlinks(false)
.build();
Ok(untar_in(archive, target.as_ref()).await?)
untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
}
/// Unpack a `.tar.zst` archive into the target directory, without requiring `Seek`.
@ -658,7 +675,9 @@ pub async fn untar_zst<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false)
.set_allow_external_symlinks(false)
.build();
Ok(untar_in(archive, target.as_ref()).await?)
untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
}
/// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`.
@ -678,7 +697,9 @@ pub async fn untar_xz<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false)
.set_allow_external_symlinks(false)
.build();
untar_in(archive, target.as_ref()).await?;
untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)?;
Ok(())
}
@ -697,7 +718,9 @@ pub async fn untar<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false)
.set_allow_external_symlinks(false)
.build();
untar_in(archive, target.as_ref()).await?;
untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)?;
Ok(())
}

View file

@ -37,7 +37,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
if file.is_dir() {
let mut directories = directories.lock().unwrap();
if directories.insert(path.clone()) {
fs_err::create_dir_all(path)?;
fs_err::create_dir_all(path).map_err(Error::Io)?;
}
return Ok(());
}
@ -45,12 +45,12 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
if let Some(parent) = path.parent() {
let mut directories = directories.lock().unwrap();
if directories.insert(parent.to_path_buf()) {
fs_err::create_dir_all(parent)?;
fs_err::create_dir_all(parent).map_err(Error::Io)?;
}
}
// Copy the file contents.
let outfile = fs_err::File::create(&path)?;
let outfile = fs_err::File::create(&path).map_err(Error::Io)?;
let size = file.size();
if size > 0 {
let mut writer = if let Ok(size) = usize::try_from(size) {
@ -58,7 +58,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
} else {
std::io::BufWriter::new(outfile)
};
std::io::copy(&mut file, &mut writer)?;
std::io::copy(&mut file, &mut writer).map_err(Error::io_or_compression)?;
}
// See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being
@ -72,12 +72,13 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
let permissions = fs_err::metadata(&path)?.permissions();
let permissions = fs_err::metadata(&path).map_err(Error::Io)?.permissions();
if permissions.mode() & 0o111 != 0o111 {
fs_err::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)?;
)
.map_err(Error::Io)?;
}
}
}
@ -97,8 +98,10 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
/// This function returns the path to that top-level directory.
pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
// TODO(konstin): Verify the name of the directory.
let top_level =
fs_err::read_dir(source.as_ref())?.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()?;
let top_level = fs_err::read_dir(source.as_ref())
.map_err(Error::Io)?
.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()
.map_err(Error::Io)?;
match top_level.as_slice() {
[root] => Ok(root.path()),
[] => Err(Error::EmptyArchive),

View file

@ -390,11 +390,33 @@ async fn build_impl(
source: String,
#[source]
cause: anyhow::Error,
#[help]
help: Option<String>,
}
let help = if let Error::Extract(uv_extract::Error::Tar(err)) = &err {
// TODO(konsti): astral-tokio-tar should use a proper error instead of
// encoding everything in strings
if err.to_string().contains("/bin/python")
&& std::error::Error::source(err).is_some_and(|err| {
err.to_string().ends_with("outside of the target directory")
})
{
Some(
"This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions."
.to_string(),
)
} else {
None
}
} else {
None
};
let report = miette::Report::new(Diagnostic {
source: source.to_string(),
cause: err.into(),
help,
});
anstream::eprint!("{report:?}");

View file

@ -2025,3 +2025,62 @@ fn force_pep517() -> Result<()> {
Ok(())
}
/// Check that we show a hint when there's a venv in the source distribution.
///
/// <https://github.com/astral-sh/uv/issues/15096>
// Windows uses trampolines instead of symlinks. You don't want those in your source distribution
// either, but that's for the build backend to catch, we're only checking for the unix error hint
// in uv here.
#[cfg(unix)]
#[test]
fn venv_included_in_sdist() -> Result<()> {
let context = TestContext::new("3.12");
context
.init()
.arg("--name")
.arg("project")
.arg("--build-backend")
.arg("hatchling")
.assert()
.success();
let pyproject_toml = indoc! {r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12.0"
[tool.hatch.build.targets.sdist.force-include]
".venv" = ".venv"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
"#};
context
.temp_dir
.child("pyproject.toml")
.write_str(pyproject_toml)?;
context.venv().assert().success();
// context.filters()
uv_snapshot!(context.filters(), context.build(), @r"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
Building source distribution...
× Failed to build `[TEMP_DIR]/`
Invalid tar file
failed to unpack `[CACHE_DIR]/sdists-v9/[TMP]/python`
symlink destination for [PYTHON-3.12] is outside of the target directory
help: This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions.
");
Ok(())
}

View file

@ -22,7 +22,7 @@ async fn unzip(url: &str) -> anyhow::Result<(), uv_extract::Error> {
.map_err(std::io::Error::other)
.into_async_read();
let target = tempfile::TempDir::new()?;
let target = tempfile::TempDir::new().map_err(uv_extract::Error::Io)?;
uv_extract::stream::unzip(reader.compat(), target.path()).await
}

View file

@ -12108,7 +12108,7 @@ fn reject_invalid_central_directory_offset() {
Resolved 1 package in [TIME]
× Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl`
Failed to extract archive: attrs-25.3.0-py3-none-any.whl
Failed to read from zip file
Invalid zip file structure
the end of central directory offset (0xf0d9) did not match the actual offset (0xf9ac)
"
);