du: port to use the safe io functions

This commit is contained in:
Sylvestre Ledru 2025-08-23 21:22:51 +02:00
parent 82d9371ed3
commit c890a3479a
5 changed files with 848 additions and 26 deletions

View file

@ -21,7 +21,13 @@ path = "src/du.rs"
# For the --exclude & --exclude-from options
glob = { workspace = true }
clap = { workspace = true }
uucore = { workspace = true, features = ["format", "fsext", "parser", "time"] }
uucore = { workspace = true, features = [
"format",
"fsext",
"parser",
"time",
"safe-traversal",
] }
thiserror = { workspace = true }
fluent = { workspace = true }

View file

@ -2,6 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore fstatat openat dirfd
use clap::{Arg, ArgAction, ArgMatches, Command, builder::PossibleValue};
use glob::Pattern;
@ -25,6 +26,8 @@ use uucore::display::{Quotable, print_verbatim};
use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code};
use uucore::fsext::{MetadataTimeField, metadata_get_time};
use uucore::line_ending::LineEnding;
#[cfg(target_os = "linux")]
use uucore::safe_traversal::DirFd;
use uucore::translate;
use uucore::parser::parse_glob;
@ -160,6 +163,44 @@ impl Stat {
metadata,
})
}
/// Create a Stat using safe traversal methods with `DirFd` for the root directory
#[cfg(target_os = "linux")]
fn new_from_dirfd(
dir_fd: &DirFd,
full_path: &Path,
_options: &TraversalOptions,
) -> std::io::Result<Self> {
// Get metadata for the directory itself using fstat
let safe_metadata = dir_fd.metadata()?;
// Create file info from the safe metadata
let file_info = safe_metadata.file_info();
let file_info_option = Some(FileInfo {
file_id: file_info.inode() as u128,
dev_id: file_info.device(),
});
let blocks = safe_metadata.blocks();
// Create a temporary std::fs::Metadata by reading the same path
// This is still needed for compatibility but should work since we're dealing with
// the root path which should be accessible
let std_metadata = fs::symlink_metadata(full_path)?;
Ok(Self {
path: full_path.to_path_buf(),
size: if safe_metadata.is_dir() {
0
} else {
safe_metadata.len()
},
blocks,
inodes: 1,
inode: file_info_option,
metadata: std_metadata,
})
}
}
#[cfg(not(windows))]
@ -254,15 +295,301 @@ fn read_block_size(s: Option<&str>) -> UResult<u64> {
}
}
#[cfg(target_os = "linux")]
// For now, implement safe_du only on Linux
// This is done for Ubuntu but should be extended to other platforms that support openat
fn safe_du(
path: &Path,
options: &TraversalOptions,
depth: usize,
seen_inodes: &mut HashSet<FileInfo>,
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
parent_fd: Option<&DirFd>,
) -> Result<Stat, Box<mpsc::SendError<UResult<StatPrintInfo>>>> {
// Get initial stat for this path - use DirFd if available to avoid path length issues
let mut my_stat = if let Some(parent_fd) = parent_fd {
// We have a parent fd, this is a subdirectory - use openat
let dir_name = path.file_name().unwrap_or(path.as_os_str());
match parent_fd.metadata_at(dir_name, false) {
Ok(safe_metadata) => {
// Create Stat from safe metadata
let file_info = safe_metadata.file_info();
let file_info_option = Some(FileInfo {
file_id: file_info.inode() as u128,
dev_id: file_info.device(),
});
let blocks = safe_metadata.blocks();
// For compatibility, still try to get std::fs::Metadata
// but fallback to a minimal approach if it fails
let std_metadata = fs::symlink_metadata(path).unwrap_or_else(|_| {
// If we can't get std metadata, create a minimal fake one
// This should rarely happen but provides a fallback
fs::symlink_metadata("/").expect("root should be accessible")
});
Stat {
path: path.to_path_buf(),
size: if safe_metadata.is_dir() {
0
} else {
safe_metadata.len()
},
blocks,
inodes: 1,
inode: file_info_option,
metadata: std_metadata,
}
}
Err(e) => {
let error = e.map_err_context(
|| translate!("du-error-cannot-access", "path" => path.quote()),
);
if let Err(send_error) = print_tx.send(Err(error)) {
return Err(Box::new(send_error));
}
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
0,
"Error already handled",
)))));
}
}
} else {
// This is the initial directory - try regular Stat::new first, then fallback to DirFd
match Stat::new(path, None, options) {
Ok(s) => s,
Err(_e) => {
// Try using our new DirFd method for the root directory
match DirFd::open(path) {
Ok(dir_fd) => match Stat::new_from_dirfd(&dir_fd, path, options) {
Ok(s) => s,
Err(e) => {
let error = e.map_err_context(
|| translate!("du-error-cannot-access", "path" => path.quote()),
);
if let Err(send_error) = print_tx.send(Err(error)) {
return Err(Box::new(send_error));
}
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
0,
"Error already handled",
)))));
}
},
Err(e) => {
let error = e.map_err_context(
|| translate!("du-error-cannot-access", "path" => path.quote()),
);
if let Err(send_error) = print_tx.send(Err(error)) {
return Err(Box::new(send_error));
}
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
0,
"Error already handled",
)))));
}
}
}
}
};
if !my_stat.metadata.is_dir() {
return Ok(my_stat);
}
// Open the directory using DirFd
let open_result = match parent_fd {
Some(parent) => parent.open_subdir(path.file_name().unwrap_or(path.as_os_str())),
None => DirFd::open(path),
};
let dir_fd = match open_result {
Ok(fd) => fd,
Err(e) => {
print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-read-directory", "path" => path.quote()),
)))?;
return Ok(my_stat);
}
};
// Read directory entries
let entries = match dir_fd.read_dir() {
Ok(entries) => entries,
Err(e) => {
print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-read-directory", "path" => path.quote()),
)))?;
return Ok(my_stat);
}
};
'file_loop: for entry_name in entries {
let entry_path = path.join(&entry_name);
// First get the lstat (without following symlinks) to check if it's a symlink
let lstat = match dir_fd.stat_at(&entry_name, false) {
Ok(stat) => stat,
Err(e) => {
print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
)))?;
continue;
}
};
// Check if it's a symlink
const S_IFMT: u32 = 0o170_000;
const S_IFDIR: u32 = 0o040_000;
const S_IFLNK: u32 = 0o120_000;
let is_symlink = (lstat.st_mode & S_IFMT) == S_IFLNK;
// Handle symlinks with -L option
// For safe traversal with -L, we skip symlinks to directories entirely
// and let the non-safe traversal handle them at the top level
let (entry_stat, is_dir) = if is_symlink && options.dereference == Deref::All {
// Skip symlinks to directories when using safe traversal with -L
// They will be handled by regular traversal
continue;
} else {
let is_dir = (lstat.st_mode & S_IFMT) == S_IFDIR;
(lstat, is_dir)
};
let file_info = if entry_stat.st_ino != 0 {
Some(FileInfo {
file_id: entry_stat.st_ino as u128,
dev_id: entry_stat.st_dev,
})
} else {
None
};
// For safe traversal, we need to handle stats differently
// We can't use std::fs::Metadata since that requires the full path
let this_stat = if is_dir {
// For directories, recurse using safe_du
Stat {
path: entry_path.clone(),
size: 0,
blocks: entry_stat.st_blocks as u64,
inodes: 1,
inode: file_info,
// We need a fake metadata - create one from symlink_metadata of parent
// This is a workaround since we can't get real metadata without the full path
metadata: my_stat.metadata.clone(),
}
} else {
// For files
Stat {
path: entry_path.clone(),
size: entry_stat.st_size as u64,
blocks: entry_stat.st_blocks as u64,
inodes: 1,
inode: file_info,
metadata: my_stat.metadata.clone(),
}
};
// Check excludes
for pattern in &options.excludes {
if pattern.matches(&this_stat.path.to_string_lossy())
|| pattern.matches(&entry_name.to_string_lossy())
{
if options.verbose {
println!(
"{}",
translate!("du-verbose-ignored", "path" => this_stat.path.quote())
);
}
continue 'file_loop;
}
}
// Handle inodes
if let Some(inode) = this_stat.inode {
if seen_inodes.contains(&inode) && (!options.count_links || !options.all) {
if options.count_links && !options.all {
my_stat.inodes += 1;
}
continue;
}
seen_inodes.insert(inode);
}
// Process directories recursively
if is_dir {
if options.one_file_system {
if let (Some(this_inode), Some(my_inode)) = (this_stat.inode, my_stat.inode) {
if this_inode.dev_id != my_inode.dev_id {
continue;
}
}
}
let this_stat = safe_du(
&entry_path,
options,
depth + 1,
seen_inodes,
print_tx,
Some(&dir_fd),
)?;
if !options.separate_dirs {
my_stat.size += this_stat.size;
my_stat.blocks += this_stat.blocks;
my_stat.inodes += this_stat.inodes;
}
print_tx.send(Ok(StatPrintInfo {
stat: this_stat,
depth: depth + 1,
}))?;
} else {
my_stat.size += this_stat.size;
my_stat.blocks += this_stat.blocks;
my_stat.inodes += 1;
if options.all {
print_tx.send(Ok(StatPrintInfo {
stat: this_stat,
depth: depth + 1,
}))?;
}
}
}
Ok(my_stat)
}
// this takes `my_stat` to avoid having to stat files multiple times.
// Only used on non-Linux platforms
// Regular traversal using std::fs
// Used on non-Linux platforms and as fallback for symlinks on Linux
#[allow(clippy::cognitive_complexity)]
fn du(
fn du_regular(
mut my_stat: Stat,
options: &TraversalOptions,
depth: usize,
seen_inodes: &mut HashSet<FileInfo>,
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
ancestors: Option<&mut HashSet<FileInfo>>,
symlink_depth: Option<usize>,
) -> Result<Stat, Box<mpsc::SendError<UResult<StatPrintInfo>>>> {
let mut default_ancestors = HashSet::new();
let ancestors = ancestors.unwrap_or(&mut default_ancestors);
let symlink_depth = symlink_depth.unwrap_or(0);
// Maximum symlink depth to prevent infinite loops
const MAX_SYMLINK_DEPTH: usize = 40;
// Add current directory to ancestors if it's a directory
let my_inode = if my_stat.metadata.is_dir() {
my_stat.inode
} else {
None
};
if let Some(inode) = my_inode {
ancestors.insert(inode);
}
if my_stat.metadata.is_dir() {
let read = match fs::read_dir(&my_stat.path) {
Ok(read) => read,
@ -277,8 +604,46 @@ fn du(
'file_loop: for f in read {
match f {
Ok(entry) => {
match Stat::new(&entry.path(), Some(&entry), options) {
let entry_path = entry.path();
// Check if this is a symlink when using -L
let mut current_symlink_depth = symlink_depth;
let is_symlink = match entry.file_type() {
Ok(ft) => ft.is_symlink(),
Err(_) => false,
};
if is_symlink && options.dereference == Deref::All {
// Increment symlink depth
current_symlink_depth += 1;
// Check symlink depth limit
if current_symlink_depth > MAX_SYMLINK_DEPTH {
print_tx.send(Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Too many levels of symbolic links",
).map_err_context(
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
)))?;
continue 'file_loop;
}
}
match Stat::new(&entry_path, Some(&entry), options) {
Ok(this_stat) => {
// Check if symlink with -L points to an ancestor (cycle detection)
if is_symlink
&& options.dereference == Deref::All
&& this_stat.metadata.is_dir()
{
if let Some(inode) = this_stat.inode {
if ancestors.contains(&inode) {
// This symlink points to an ancestor directory - skip to avoid cycle
continue 'file_loop;
}
}
}
// We have an exclude list
for pattern in &options.excludes {
// Look at all patterns with both short and long paths
@ -326,8 +691,15 @@ fn du(
}
}
let this_stat =
du(this_stat, options, depth + 1, seen_inodes, print_tx)?;
let this_stat = du_regular(
this_stat,
options,
depth + 1,
seen_inodes,
print_tx,
Some(ancestors),
Some(current_symlink_depth),
)?;
if !options.separate_dirs {
my_stat.size += this_stat.size;
@ -350,9 +722,20 @@ fn du(
}
}
}
Err(e) => print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-access", "path" => entry.path().quote()),
)))?,
Err(e) => {
// Check if this is the "too many symlinks" error we want to catch
if e.kind() == std::io::ErrorKind::InvalidData
&& e.to_string().contains("Too many levels")
{
print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
)))?;
} else {
print_tx.send(Err(e.map_err_context(
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
)))?;
}
}
}
}
Err(error) => print_tx.send(Err(error.into()))?,
@ -360,6 +743,11 @@ fn du(
}
}
// Remove current directory from ancestors before returning
if let Some(inode) = my_inode {
ancestors.remove(&inode);
}
Ok(my_stat)
}
@ -727,25 +1115,80 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
}
// Check existence of path provided in argument
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
// Kick off the computation of disk usage from the initial path
let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode {
seen_inodes.insert(inode);
let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
// Determine which traversal method to use
#[cfg(target_os = "linux")]
let use_safe_traversal = traversal_options.dereference != Deref::All;
#[cfg(not(target_os = "linux"))]
let use_safe_traversal = false;
if use_safe_traversal {
// Use safe traversal (Linux only, when not using -L)
#[cfg(target_os = "linux")]
{
// Pre-populate seen_inodes with the starting directory to detect cycles
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
if let Some(inode) = stat.inode {
seen_inodes.insert(inode);
}
}
match safe_du(
&path,
&traversal_options,
0,
&mut seen_inodes,
&print_tx,
None,
) {
Ok(stat) => {
print_tx
.send(Ok(StatPrintInfo { stat, depth: 0 }))
.map_err(|e| USimpleError::new(1, e.to_string()))?;
}
Err(e) => {
// Check if this is our "already handled" error
if let mpsc::SendError(Err(simple_error)) = e.as_ref() {
if simple_error.code() == 0 {
// Error already handled, continue to next file
continue 'loop_file;
}
}
return Err(USimpleError::new(1, e.to_string()));
}
}
}
let stat = du(stat, &traversal_options, 0, &mut seen_inodes, &print_tx)
} else {
// Use regular traversal (non-Linux or when -L is used)
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
if let Some(inode) = stat.inode {
seen_inodes.insert(inode);
}
let stat = du_regular(
stat,
&traversal_options,
0,
&mut seen_inodes,
&print_tx,
None,
None,
)
.map_err(|e| USimpleError::new(1, e.to_string()))?;
print_tx
.send(Ok(StatPrintInfo { stat, depth: 0 }))
.map_err(|e| USimpleError::new(1, e.to_string()))?;
} else {
print_tx
.send(Err(USimpleError::new(
1,
translate!("du-error-cannot-access-no-such-file", "path" => path.to_string_lossy().quote()),
)))
.map_err(|e| USimpleError::new(1, e.to_string()))?;
print_tx
.send(Ok(StatPrintInfo { stat, depth: 0 }))
.map_err(|e| USimpleError::new(1, e.to_string()))?;
} else {
#[cfg(target_os = "linux")]
let error_msg = translate!("du-error-cannot-access", "path" => path.quote());
#[cfg(not(target_os = "linux"))]
let error_msg = translate!("du-error-cannot-access-no-such-file", "path" => path.to_string_lossy().quote());
print_tx
.send(Err(USimpleError::new(1, error_msg)))
.map_err(|e| USimpleError::new(1, e.to_string()))?;
}
}
}

View file

@ -1,7 +1,8 @@
// Safe directory traversal using openat() and related syscalls
// This module provides TOCTOU-safe filesystem operations for recursive traversal
// Only available on Linux
// spell-checker:ignore CLOEXEC RDONLY TOCTOU closedir dirp fdopendir fstatat openat
// spell-checker:ignore CLOEXEC RDONLY TOCTOU closedir dirp fdopendir fstatat openat REMOVEDIR unlinkat
// spell-checker:ignore RAII dirfd
#![cfg(target_os = "linux")]
@ -291,7 +292,6 @@ impl FileInfo {
ino: stat.st_ino as u64,
}
}
}
/// Create FileInfo from device and inode numbers
pub fn new(dev: u64, ino: u64) -> Self {
@ -384,6 +384,144 @@ impl Metadata {
pub fn as_raw_stat(&self) -> &libc::stat {
&self.stat
}
/// Compatibility methods to match std::fs::Metadata interface
pub fn is_dir(&self) -> bool {
self.file_type().is_directory()
}
pub fn len(&self) -> u64 {
self.size()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
// Add MetadataExt trait implementation for compatibility
#[cfg(not(windows))]
impl std::os::unix::fs::MetadataExt for Metadata {
fn dev(&self) -> u64 {
self.stat.st_dev
}
fn ino(&self) -> u64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_ino.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_ino
}
}
fn mode(&self) -> u32 {
self.stat.st_mode
}
fn nlink(&self) -> u64 {
// st_nlink is u32 on most platforms except x86_64
#[cfg(target_arch = "x86_64")]
{
self.stat.st_nlink
}
#[cfg(not(target_arch = "x86_64"))]
{
self.stat.st_nlink.into()
}
}
fn uid(&self) -> u32 {
self.stat.st_uid
}
fn gid(&self) -> u32 {
self.stat.st_gid
}
fn rdev(&self) -> u64 {
self.stat.st_rdev
}
fn size(&self) -> u64 {
self.stat.st_size as u64
}
fn atime(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_atime.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_atime
}
}
fn atime_nsec(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_atime_nsec.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_atime_nsec
}
}
fn mtime(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_mtime.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_mtime
}
}
fn mtime_nsec(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_mtime_nsec.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_mtime_nsec
}
}
fn ctime(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_ctime.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_ctime
}
}
fn ctime_nsec(&self) -> i64 {
#[cfg(target_pointer_width = "32")]
{
self.stat.st_ctime_nsec.into()
}
#[cfg(not(target_pointer_width = "32"))]
{
self.stat.st_ctime_nsec
}
}
fn blksize(&self) -> u64 {
self.stat.st_blksize as u64
}
fn blocks(&self) -> u64 {
self.stat.st_blocks as u64
}
}
#[cfg(test)]
@ -647,3 +785,4 @@ mod tests {
);
}
}
}

View file

@ -4,6 +4,7 @@
// file that was distributed with this source code.
// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile
// spell-checker:ignore selfref ELOOP
#[cfg(not(windows))]
use regex::Regex;
@ -1439,3 +1440,235 @@ fn test_du_threshold_no_suggested_values() {
let result = ts.ucmd().arg("--threshold").fails();
assert!(!result.stderr_str().contains("[possible values: ]"));
}
#[test]
#[cfg(target_os = "linux")]
fn test_du_long_path_safe_traversal() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let mut deep_path = String::from("long_path_test");
at.mkdir(&deep_path);
for i in 0..15 {
let long_dir_name = format!("{}{}", "a".repeat(100), i);
deep_path = format!("{deep_path}/{long_dir_name}");
at.mkdir_all(&deep_path);
}
let test_file = format!("{deep_path}/test.txt");
at.write(&test_file, "test content");
let result = ts.ucmd().arg("-s").arg("long_path_test").succeeds();
assert!(result.stdout_str().contains("long_path_test"));
let result = ts.ucmd().arg("long_path_test").succeeds();
let lines: Vec<&str> = result.stdout_str().trim().lines().collect();
assert!(lines.len() >= 15);
}
#[test]
#[cfg(unix)]
fn test_du_very_deep_directory() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let mut current_path = String::from("x");
at.mkdir(&current_path);
for _ in 0..10 {
current_path = format!("{current_path}/x");
at.mkdir_all(&current_path);
}
at.write(&format!("{current_path}/file.txt"), "deep file");
let result = ts.ucmd().arg("-s").arg("x").succeeds();
assert!(result.stdout_str().contains('x'));
let result = ts.ucmd().arg("-a").arg("x").succeeds();
let output = result.stdout_str();
assert!(output.contains("file.txt"));
}
#[test]
#[cfg(unix)]
fn test_du_safe_traversal_with_symlinks() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let mut deep_path = String::from("symlink_test");
at.mkdir(&deep_path);
for i in 0..8 {
let dir_name = format!("{}{}", "b".repeat(50), i);
deep_path = format!("{deep_path}/{dir_name}");
at.mkdir_all(&deep_path);
}
at.write(&format!("{deep_path}/target.txt"), "target content");
at.symlink_file(&format!("{deep_path}/target.txt"), "shallow_link.txt");
let result = ts.ucmd().arg("-L").arg("shallow_link.txt").succeeds();
assert!(!result.stdout_str().is_empty());
let result = ts.ucmd().arg("shallow_link.txt").succeeds();
assert!(!result.stdout_str().is_empty());
}
#[test]
#[cfg(target_os = "linux")]
fn test_du_inaccessible_directory() {
// tested by tests/du/no-x
let ts = TestScenario::new(util_name!());
let at = ts.fixtures.clone();
at.mkdir("d");
at.mkdir("d/no-x");
at.mkdir("d/no-x/y");
at.set_mode("d/no-x", 0o600);
let result = ts.ucmd().arg("d").fails();
result.stderr_contains("du: cannot access 'd/no-x/y': Permission denied");
}
#[test]
#[cfg(unix)]
fn test_du_symlink_self_reference() {
// Test symlink that points to its own directory
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir("selfref");
at.symlink_dir("selfref", "selfref/self");
let result = ts.ucmd().arg("-L").arg("selfref").succeeds();
result.stdout_contains("selfref");
// Should not show the self-referencing symlink to avoid infinite recursion
result.stdout_does_not_contain("selfref/self");
}
#[test]
#[cfg(unix)]
fn test_du_long_symlink_chain() {
// Test that very long symlink chains are handled gracefully
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
// Create a simple structure that tests symlink depth limits
// Instead of trying to create a chain that causes ELOOP, test that reasonable chains work
at.mkdir_all("deep/level1/level2/level3/level4/level5");
at.write(
"deep/level1/level2/level3/level4/level5/file.txt",
"content",
);
at.symlink_dir("deep/level1", "link1");
at.symlink_dir("link1/level2", "link2");
at.symlink_dir("link2/level3", "link3");
let result = ts.ucmd().arg("-L").arg("link3").succeeds();
result.stdout_contains("link3");
}
#[test]
#[cfg(all(unix, not(target_os = "macos")))]
fn test_du_bind_mount_simulation() {
// Simulate bind mount scenario using hard links where possible
// Note: This test simulates what bind mounts do - making the same directory
// appear in multiple places with the same inode
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("mount_test/subdir");
at.write("mount_test/file1.txt", "content1");
at.write("mount_test/subdir/file2.txt", "content2");
// On systems where we can't create actual bind mounts,
// we test that cycle detection works with symlinks that would create similar cycles
at.symlink_dir("../mount_test", "mount_test/subdir/cycle_link");
let result = ts.ucmd().arg("mount_test").succeeds();
result.stdout_contains("mount_test/subdir");
result.stdout_contains("mount_test");
result.stdout_does_not_contain("mount_test/subdir/cycle_link");
}
#[test]
#[cfg(unix)]
fn test_du_symlink_depth_tracking() {
// Test that du can handle reasonable symlink chains without hitting depth limits
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("chain/dir1/dir2/dir3");
at.write("chain/dir1/dir2/dir3/file.txt", "content");
at.symlink_dir("chain/dir1/dir2", "shortcut");
let result = ts.ucmd().arg("-L").arg("shortcut").succeeds();
result.stdout_contains("shortcut/dir3");
result.stdout_contains("shortcut");
}
#[test]
#[cfg(target_os = "linux")]
fn test_du_long_path_from_unreadable() {
// Test the specific scenario from GNU's long-from-unreadable.sh test
// This verifies that du can handle very long paths when the current directory is unreadable
use std::env;
use std::fs;
use std::os::unix::fs::PermissionsExt;
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
// Create a deep hierarchy similar to the GNU test
// Use a more reasonable depth for unit tests
let dir_name = "x".repeat(200);
let mut current_path = String::new();
for i in 0..20 {
if i == 0 {
current_path = dir_name.clone();
} else {
current_path = format!("{current_path}/{dir_name}");
}
at.mkdir_all(&current_path);
}
at.write(&format!("{current_path}/test.txt"), "test content");
at.mkdir("inaccessible");
let original_cwd = env::current_dir().unwrap();
let inaccessible_path = at.plus("inaccessible");
env::set_current_dir(&inaccessible_path).unwrap();
// Remove read permission from the directory
let mut perms = fs::metadata(&inaccessible_path).unwrap().permissions();
perms.set_mode(0o000);
fs::set_permissions(&inaccessible_path, perms).unwrap();
// Try to run du on the long path from the unreadable directory
let target_path = at.plus(&dir_name);
let result = ts.ucmd().arg("-s").arg(&target_path).succeeds(); // Should succeed with safe traversal
assert!(!result.stdout_str().is_empty());
let output = result.stdout_str().trim();
let parts: Vec<&str> = output.split_whitespace().collect();
assert_eq!(parts.len(), 2);
assert!(parts[0].parse::<u64>().is_ok());
assert!(parts[1].contains(&dir_name[..50])); // Check first part of the long name
env::set_current_dir(&original_cwd).unwrap();
// Restore permissions so the directory can be cleaned up
let mut perms = fs::metadata(&inaccessible_path).unwrap().permissions();
perms.set_mode(0o755);
fs::set_permissions(&inaccessible_path, perms).unwrap();
}

View file

@ -4,6 +4,7 @@
# spell-checker:ignore (paths) abmon deref discrim eacces getlimits getopt ginstall inacc infloop inotify reflink ; (misc) INT_OFLOW OFLOW
# spell-checker:ignore baddecode submodules xstrtol distros ; (vars/env) SRCDIR vdir rcexp xpart dired OSTYPE ; (utils) gnproc greadlink gsed multihardlink texinfo CARGOFLAGS
# spell-checker:ignore openat TOCTOU
set -e