Avoid erroring for source distributions with symlinks in archive (#1944)

## Summary

For context, we have three extraction paths:

- untar (async) - used for any `.tar.gz`, local or remote.
- unzip (async) - used to unzip remote wheels, or local or remote source
distributions.
- unzip (sync) - used to untar locally-available wheels into the cache.

We use three different crates for these:

- [`tokio-tar`](https://github.com/vorot93/tokio-tar)
- [`async-zip`](https://github.com/Majored/rs-async-zip)
- [`zip-rs`](https://github.com/zip-rs/zip)

These all seem to have different support for symlinks:

- `tokio-tar` tries to create a symlink (which works fine on Unix but
errors on Windows, since we typically don't have elevated permissions).
- `async-zip` _seems_ to write the target contents directly to the file
(which is what we want).
- `zip-rs` _apparently_ writes the _name_ of the target to the file
(which isn't what we want).

Thankfully, symlinks are not allowed in wheels
(https://github.com/pypa/pip/issues/5919,
https://discuss.python.org/t/symbolic-links-in-wheels/1945), so we can
ignore `zip-rs`.

For `tokio-tar`, we now _skip_ (and warn) if we see a symlink on
Windows. We could do what pip does, and recursively copy, but it's
difficult because we don't have `Seek` on the file. (Alternatively, we
could use hard links and junctions, though those also might need to
exist already.) Let's see how far this gets us.

(We also no longer attempt to set permissions on symlinks on Unix, which
caused another failure.)

Closes https://github.com/astral-sh/uv/issues/1858.
This commit is contained in:
Charlie Marsh 2024-02-23 22:22:13 -05:00 committed by GitHub
parent 019e2fd1b5
commit a1f50418fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 78 additions and 12 deletions

View file

@ -24,4 +24,5 @@ thiserror = { workspace = true }
tokio = { workspace = true, features = ["io-util"] }
tokio-tar = { workspace = true }
tokio-util = { workspace = true, features = ["compat"] }
tracing = { workspace = true }
zip = { workspace = true }

View file

@ -4,6 +4,7 @@ use std::pin::Pin;
use futures::StreamExt;
use rustc_hash::FxHashSet;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::warn;
use crate::Error;
@ -41,7 +42,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
}
// We don't know the file permissions here, because we haven't seen the central directory yet.
let file = fs_err::tokio::File::create(path).await?;
let file = fs_err::tokio::File::create(&path).await?;
let mut writer =
if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) {
tokio::io::BufWriter::with_capacity(size, file)
@ -111,6 +112,19 @@ async fn untar_in<R: tokio::io::AsyncRead + Unpin, P: AsRef<Path>>(
while let Some(entry) = pinned.next().await {
// Unpack the file into the destination directory.
let mut file = entry?;
// On Windows, skip symlink entries, as they're not supported. pip recursively copies the
// symlink target instead.
if cfg!(windows) {
if file.header().entry_type().is_symlink() {
warn!(
"Skipping symlink in tar archive: {}",
file.path()?.display()
);
continue;
}
}
file.unpack_in(dst.as_ref()).await?;
// Preserve the executable bit.
@ -119,17 +133,19 @@ async fn untar_in<R: tokio::io::AsyncRead + Unpin, P: AsRef<Path>>(
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
let mode = file.header().mode()?;
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
if let Some(path) = crate::tar::unpacked_at(dst.as_ref(), &file.path()?) {
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
let entry_type = file.header().entry_type();
if entry_type.is_file() || entry_type.is_hard_link() {
let mode = file.header().mode()?;
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
if let Some(path) = crate::tar::unpacked_at(dst.as_ref(), &file.path()?) {
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
}
}
}
}

View file

@ -49,6 +49,19 @@ fn command(context: &TestContext) -> Command {
command
}
/// Create a `pip uninstall` command with options shared across scenarios.
fn uninstall_command(context: &TestContext) -> Command {
let mut command = Command::new(get_bin());
command
.arg("pip")
.arg("uninstall")
.arg("--cache-dir")
.arg(context.cache_dir.path())
.env("VIRTUAL_ENV", context.venv.as_os_str())
.current_dir(&context.temp_dir);
command
}
#[test]
fn missing_requirements_txt() {
let context = TestContext::new("3.12");
@ -1770,3 +1783,38 @@ fn reinstall_duplicate() -> Result<()> {
Ok(())
}
/// Install a package that contains a symlink within the archive.
#[test]
fn install_symlink() {
let context = TestContext::new("3.12");
uv_snapshot!(command(&context)
.arg("pgpdump==1.5")
.arg("--strict"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ pgpdump==1.5
"###
);
context.assert_command("import pgpdump").success();
uv_snapshot!(uninstall_command(&context)
.arg("pgpdump"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Uninstalled 1 package in [TIME]
- pgpdump==1.5
"###
);
}