Add hint for venv in source distribution error (#15202)

Venvs should not be in source distributions, and on Unix, we now reject
them for having a link outside the source directory. This PR adds a hint
for that since users were confused (#15096).

In the process, we're differentiating IO errors for format error for
uncompression generally.

Fixes #15096
This commit is contained in:
konsti 2025-08-19 00:07:57 +02:00 committed by GitHub
parent 724e4c7e5e
commit 4f4492dd53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 167 additions and 34 deletions

5
Cargo.lock generated
View file

@ -184,9 +184,8 @@ dependencies = [
[[package]] [[package]]
name = "astral-tokio-tar" name = "astral-tokio-tar"
version = "0.5.2" version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/astral-sh/tokio-tar?rev=f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688#f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688"
checksum = "1abb2bfba199d9ec4759b797115ba6ae435bdd920ce99783bb53aeff57ba919b"
dependencies = [ dependencies = [
"filetime", "filetime",
"futures-core", "futures-core",

View file

@ -77,7 +77,7 @@ anstream = { version = "0.6.15" }
anyhow = { version = "1.0.89" } anyhow = { version = "1.0.89" }
arcstr = { version = "1.2.0" } arcstr = { version = "1.2.0" }
arrayvec = { version = "0.7.6" } arrayvec = { version = "0.7.6" }
astral-tokio-tar = { version = "0.5.2" } astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", rev = "f1488188f1d1b54a73eb0c42a8b8f4b9ee87d688" }
async-channel = { version = "2.3.1" } async-channel = { version = "2.3.1" }
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
async-trait = { version = "0.1.82" } async-trait = { version = "0.1.82" }

View file

@ -2,12 +2,14 @@ use std::{ffi::OsString, path::PathBuf};
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum Error { pub enum Error {
#[error("Failed to read from zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Failed to read from zip file")]
AsyncZip(#[from] async_zip::error::ZipError),
#[error("I/O operation failed during extraction")] #[error("I/O operation failed during extraction")]
Io(#[from] std::io::Error), Io(std::io::Error),
#[error("Invalid zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Invalid zip file structure")]
AsyncZip(#[from] async_zip::error::ZipError),
#[error("Invalid tar file")]
Tar(#[from] tokio_tar::TarError),
#[error( #[error(
"The top-level of the archive must only contain a list directory, but it contains: {0:?}" "The top-level of the archive must only contain a list directory, but it contains: {0:?}"
)] )]
@ -90,6 +92,31 @@ pub enum Error {
} }
impl Error { impl Error {
/// When reading from an archive, the error can either be an IO error from the underlying
/// operating system, or an error with the archive. Both get wrapper into an IO error through
/// e.g., `io::copy`. This method extracts zip and tar errors, to distinguish them from invalid
/// archives.
pub(crate) fn io_or_compression(err: std::io::Error) -> Self {
if err.kind() != std::io::ErrorKind::Other {
return Self::Io(err);
}
let err = match err.downcast::<tokio_tar::TarError>() {
Ok(tar_err) => return Self::Tar(tar_err),
Err(err) => err,
};
let err = match err.downcast::<async_zip::error::ZipError>() {
Ok(zip_err) => return Self::AsyncZip(zip_err),
Err(err) => err,
};
let err = match err.downcast::<zip::result::ZipError>() {
Ok(zip_err) => return Self::Zip(zip_err),
Err(err) => err,
};
Self::Io(err)
}
/// Returns `true` if the error is due to the server not supporting HTTP streaming. Most /// Returns `true` if the error is due to the server not supporting HTTP streaming. Most
/// commonly, this is due to serving ZIP files with features that are incompatible with /// commonly, this is due to serving ZIP files with features that are incompatible with
/// streaming, like data descriptors. /// streaming, like data descriptors.

View file

@ -126,7 +126,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
let is_dir = entry.reader().entry().dir()?; let is_dir = entry.reader().entry().dir()?;
let computed = if is_dir { let computed = if is_dir {
if directories.insert(path.clone()) { if directories.insert(path.clone()) {
fs_err::tokio::create_dir_all(path).await?; fs_err::tokio::create_dir_all(path)
.await
.map_err(Error::Io)?;
} }
// If this is a directory, we expect the CRC32 to be 0. // If this is a directory, we expect the CRC32 to be 0.
@ -159,7 +161,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
} else { } else {
if let Some(parent) = path.parent() { if let Some(parent) = path.parent() {
if directories.insert(parent.to_path_buf()) { if directories.insert(parent.to_path_buf()) {
fs_err::tokio::create_dir_all(parent).await?; fs_err::tokio::create_dir_all(parent)
.await
.map_err(Error::Io)?;
} }
} }
@ -176,7 +180,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
tokio::io::BufWriter::new(file) tokio::io::BufWriter::new(file)
}; };
let mut reader = entry.reader_mut().compat(); let mut reader = entry.reader_mut().compat();
let bytes_read = tokio::io::copy(&mut reader, &mut writer).await?; let bytes_read = tokio::io::copy(&mut reader, &mut writer)
.await
.map_err(Error::io_or_compression)?;
let reader = reader.into_inner(); let reader = reader.into_inner();
(bytes_read, reader) (bytes_read, reader)
@ -188,12 +194,15 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
); );
// Read the existing file into memory. // Read the existing file into memory.
let existing_contents = fs_err::tokio::read(&path).await?; let existing_contents = fs_err::tokio::read(&path).await.map_err(Error::Io)?;
// Read the entry into memory. // Read the entry into memory.
let mut expected_contents = Vec::with_capacity(existing_contents.len()); let mut expected_contents = Vec::with_capacity(existing_contents.len());
let entry_reader = entry.reader_mut(); let entry_reader = entry.reader_mut();
let bytes_read = entry_reader.read_to_end(&mut expected_contents).await?; let bytes_read = entry_reader
.read_to_end(&mut expected_contents)
.await
.map_err(Error::io_or_compression)?;
// Verify that the existing file contents match the expected contents. // Verify that the existing file contents match the expected contents.
if existing_contents != expected_contents { if existing_contents != expected_contents {
@ -204,7 +213,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
(bytes_read as u64, entry_reader) (bytes_read as u64, entry_reader)
} }
Err(err) => return Err(err.into()), Err(err) => return Err(Error::Io(err)),
}; };
// Validate the uncompressed size. // Validate the uncompressed size.
@ -458,13 +467,17 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
let has_any_executable_bit = mode & 0o111; let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 { if has_any_executable_bit != 0 {
let path = target.join(relpath); let path = target.join(relpath);
let permissions = fs_err::tokio::metadata(&path).await?.permissions(); let permissions = fs_err::tokio::metadata(&path)
.await
.map_err(Error::Io)?
.permissions();
if permissions.mode() & 0o111 != 0o111 { if permissions.mode() & 0o111 != 0o111 {
fs_err::tokio::set_permissions( fs_err::tokio::set_permissions(
&path, &path,
Permissions::from_mode(permissions.mode() | 0o111), Permissions::from_mode(permissions.mode() | 0o111),
) )
.await?; .await
.map_err(Error::Io)?;
} }
} }
} }
@ -522,10 +535,10 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
// Determine whether the reader is exhausted. // Determine whether the reader is exhausted.
if !skip_validation { if !skip_validation {
let mut buffer = [0; 1]; let mut buffer = [0; 1];
if reader.read(&mut buffer).await? > 0 { if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 {
// If the buffer contains a single null byte, ignore it. // If the buffer contains a single null byte, ignore it.
if buffer[0] == 0 { if buffer[0] == 0 {
if reader.read(&mut buffer).await? > 0 { if reader.read(&mut buffer).await.map_err(Error::Io)? > 0 {
return Err(Error::TrailingContents); return Err(Error::TrailingContents);
} }
@ -618,7 +631,9 @@ pub async fn untar_gz<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false) .set_preserve_permissions(false)
.set_allow_external_symlinks(false) .set_allow_external_symlinks(false)
.build(); .build();
Ok(untar_in(archive, target.as_ref()).await?) untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
} }
/// Unpack a `.tar.bz2` archive into the target directory, without requiring `Seek`. /// Unpack a `.tar.bz2` archive into the target directory, without requiring `Seek`.
@ -638,7 +653,9 @@ pub async fn untar_bz2<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false) .set_preserve_permissions(false)
.set_allow_external_symlinks(false) .set_allow_external_symlinks(false)
.build(); .build();
Ok(untar_in(archive, target.as_ref()).await?) untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
} }
/// Unpack a `.tar.zst` archive into the target directory, without requiring `Seek`. /// Unpack a `.tar.zst` archive into the target directory, without requiring `Seek`.
@ -658,7 +675,9 @@ pub async fn untar_zst<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false) .set_preserve_permissions(false)
.set_allow_external_symlinks(false) .set_allow_external_symlinks(false)
.build(); .build();
Ok(untar_in(archive, target.as_ref()).await?) untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)
} }
/// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`. /// Unpack a `.tar.xz` archive into the target directory, without requiring `Seek`.
@ -678,7 +697,9 @@ pub async fn untar_xz<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false) .set_preserve_permissions(false)
.set_allow_external_symlinks(false) .set_allow_external_symlinks(false)
.build(); .build();
untar_in(archive, target.as_ref()).await?; untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)?;
Ok(()) Ok(())
} }
@ -697,7 +718,9 @@ pub async fn untar<R: tokio::io::AsyncRead + Unpin>(
.set_preserve_permissions(false) .set_preserve_permissions(false)
.set_allow_external_symlinks(false) .set_allow_external_symlinks(false)
.build(); .build();
untar_in(archive, target.as_ref()).await?; untar_in(archive, target.as_ref())
.await
.map_err(Error::io_or_compression)?;
Ok(()) Ok(())
} }

View file

@ -37,7 +37,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
if file.is_dir() { if file.is_dir() {
let mut directories = directories.lock().unwrap(); let mut directories = directories.lock().unwrap();
if directories.insert(path.clone()) { if directories.insert(path.clone()) {
fs_err::create_dir_all(path)?; fs_err::create_dir_all(path).map_err(Error::Io)?;
} }
return Ok(()); return Ok(());
} }
@ -45,12 +45,12 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
if let Some(parent) = path.parent() { if let Some(parent) = path.parent() {
let mut directories = directories.lock().unwrap(); let mut directories = directories.lock().unwrap();
if directories.insert(parent.to_path_buf()) { if directories.insert(parent.to_path_buf()) {
fs_err::create_dir_all(parent)?; fs_err::create_dir_all(parent).map_err(Error::Io)?;
} }
} }
// Copy the file contents. // Copy the file contents.
let outfile = fs_err::File::create(&path)?; let outfile = fs_err::File::create(&path).map_err(Error::Io)?;
let size = file.size(); let size = file.size();
if size > 0 { if size > 0 {
let mut writer = if let Ok(size) = usize::try_from(size) { let mut writer = if let Ok(size) = usize::try_from(size) {
@ -58,7 +58,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
} else { } else {
std::io::BufWriter::new(outfile) std::io::BufWriter::new(outfile)
}; };
std::io::copy(&mut file, &mut writer)?; std::io::copy(&mut file, &mut writer).map_err(Error::io_or_compression)?;
} }
// See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being // See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being
@ -72,12 +72,13 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100 // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
let has_any_executable_bit = mode & 0o111; let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 { if has_any_executable_bit != 0 {
let permissions = fs_err::metadata(&path)?.permissions(); let permissions = fs_err::metadata(&path).map_err(Error::Io)?.permissions();
if permissions.mode() & 0o111 != 0o111 { if permissions.mode() & 0o111 != 0o111 {
fs_err::set_permissions( fs_err::set_permissions(
&path, &path,
Permissions::from_mode(permissions.mode() | 0o111), Permissions::from_mode(permissions.mode() | 0o111),
)?; )
.map_err(Error::Io)?;
} }
} }
} }
@ -97,8 +98,10 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
/// This function returns the path to that top-level directory. /// This function returns the path to that top-level directory.
pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> { pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
// TODO(konstin): Verify the name of the directory. // TODO(konstin): Verify the name of the directory.
let top_level = let top_level = fs_err::read_dir(source.as_ref())
fs_err::read_dir(source.as_ref())?.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()?; .map_err(Error::Io)?
.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()
.map_err(Error::Io)?;
match top_level.as_slice() { match top_level.as_slice() {
[root] => Ok(root.path()), [root] => Ok(root.path()),
[] => Err(Error::EmptyArchive), [] => Err(Error::EmptyArchive),

View file

@ -390,11 +390,33 @@ async fn build_impl(
source: String, source: String,
#[source] #[source]
cause: anyhow::Error, cause: anyhow::Error,
#[help]
help: Option<String>,
} }
let help = if let Error::Extract(uv_extract::Error::Tar(err)) = &err {
// TODO(konsti): astral-tokio-tar should use a proper error instead of
// encoding everything in strings
if err.to_string().contains("/bin/python")
&& std::error::Error::source(err).is_some_and(|err| {
err.to_string().ends_with("outside of the target directory")
})
{
Some(
"This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions."
.to_string(),
)
} else {
None
}
} else {
None
};
let report = miette::Report::new(Diagnostic { let report = miette::Report::new(Diagnostic {
source: source.to_string(), source: source.to_string(),
cause: err.into(), cause: err.into(),
help,
}); });
anstream::eprint!("{report:?}"); anstream::eprint!("{report:?}");

View file

@ -2025,3 +2025,62 @@ fn force_pep517() -> Result<()> {
Ok(()) Ok(())
} }
/// Check that we show a hint when there's a venv in the source distribution.
///
/// <https://github.com/astral-sh/uv/issues/15096>
// Windows uses trampolines instead of symlinks. You don't want those in your source distribution
// either, but that's for the build backend to catch, we're only checking for the unix error hint
// in uv here.
#[cfg(unix)]
#[test]
fn venv_included_in_sdist() -> Result<()> {
let context = TestContext::new("3.12");
context
.init()
.arg("--name")
.arg("project")
.arg("--build-backend")
.arg("hatchling")
.assert()
.success();
let pyproject_toml = indoc! {r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12.0"
[tool.hatch.build.targets.sdist.force-include]
".venv" = ".venv"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
"#};
context
.temp_dir
.child("pyproject.toml")
.write_str(pyproject_toml)?;
context.venv().assert().success();
// context.filters()
uv_snapshot!(context.filters(), context.build(), @r"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
Building source distribution...
× Failed to build `[TEMP_DIR]/`
Invalid tar file
failed to unpack `[CACHE_DIR]/sdists-v9/[TMP]/python`
symlink destination for [PYTHON-3.12] is outside of the target directory
help: This file seems to be part of a virtual environment. Virtual environments must be excluded from source distributions.
");
Ok(())
}

View file

@ -22,7 +22,7 @@ async fn unzip(url: &str) -> anyhow::Result<(), uv_extract::Error> {
.map_err(std::io::Error::other) .map_err(std::io::Error::other)
.into_async_read(); .into_async_read();
let target = tempfile::TempDir::new()?; let target = tempfile::TempDir::new().map_err(uv_extract::Error::Io)?;
uv_extract::stream::unzip(reader.compat(), target.path()).await uv_extract::stream::unzip(reader.compat(), target.path()).await
} }

View file

@ -12108,7 +12108,7 @@ fn reject_invalid_central_directory_offset() {
Resolved 1 package in [TIME] Resolved 1 package in [TIME]
× Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl` × Failed to download `attrs @ https://pub-c6f28d316acd406eae43501e51ad30fa.r2.dev/zip1/attrs-25.3.0-py3-none-any.whl`
Failed to extract archive: attrs-25.3.0-py3-none-any.whl Failed to extract archive: attrs-25.3.0-py3-none-any.whl
Failed to read from zip file Invalid zip file structure
the end of central directory offset (0xf0d9) did not match the actual offset (0xf9ac) the end of central directory offset (0xf0d9) did not match the actual offset (0xf9ac)
" "
); );