Sanitize filenames during zip extraction (#8732)

## Summary

Based on the example in `async-zip`:
527bda9d58/examples/file_extraction.rs (L33)

Closes: https://github.com/astral-sh/uv/issues/8731.

## Test Plan

Created https://github.com/astral-sh/sanitize-wheel-test.
This commit is contained in:
Charlie Marsh 2024-10-31 15:12:51 -04:00 committed by GitHub
parent 8d3408fe39
commit f3264583ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 71 additions and 3 deletions

View file

@ -1,13 +1,15 @@
use std::path::Path;
use std::path::{Component, Path, PathBuf};
use std::pin::Pin;
use crate::Error;
use futures::StreamExt;
use rustc_hash::FxHashSet;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::warn;
use uv_distribution_filename::SourceDistExtension;
use crate::Error;
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
/// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
@ -19,6 +21,26 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
/// Ensure the file path is safe to use as a [`Path`].
///
/// See: <https://docs.rs/zip/latest/zip/read/struct.ZipFile.html#method.enclosed_name>
pub(crate) fn enclosed_name(file_name: &str) -> Option<PathBuf> {
if file_name.contains('\0') {
return None;
}
let path = PathBuf::from(file_name);
let mut depth = 0usize;
for component in path.components() {
match component {
Component::Prefix(_) | Component::RootDir => return None,
Component::ParentDir => depth = depth.checked_sub(1)?,
Component::Normal(_) => depth += 1,
Component::CurDir => (),
}
}
Some(path)
}
let target = target.as_ref();
let mut reader = futures::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader.compat());
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);
@ -28,6 +50,16 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
while let Some(mut entry) = zip.next_with_entry().await? {
// Construct the (expected) path to the file on-disk.
let path = entry.reader().entry().filename().as_str()?;
// Sanitize the file name to prevent directory traversal attacks.
let Some(path) = enclosed_name(path) else {
warn!("Skipping unsafe file name: {path}");
// Close current file prior to proceeding, as per:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry.skip().await?;
continue;
};
let path = target.join(path);
let is_dir = entry.reader().entry().dir()?;
@ -55,7 +87,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
tokio::io::copy(&mut reader, &mut writer).await?;
}
// Close current file to get access to the next one. See docs:
// Close current file prior to proceeding, as per:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry.skip().await?;
}
@ -84,6 +116,9 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
if has_any_executable_bit != 0 {
// Construct the (expected) path to the file on-disk.
let path = entry.filename().as_str()?;
let Some(path) = enclosed_name(path) else {
continue;
};
let path = target.join(path);
let permissions = fs_err::tokio::metadata(&path).await?.permissions();

View file

@ -3,6 +3,7 @@ use std::sync::Mutex;
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use tracing::warn;
use zip::ZipArchive;
use crate::vendor::{CloneableSeekableReader, HasLength};
@ -25,6 +26,7 @@ pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
// Determine the path of the file within the wheel.
let Some(enclosed_name) = file.enclosed_name() else {
warn!("Skipping unsafe file name: {}", file.name());
return Ok(());
};