mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
du: port to use the safe io functions
This commit is contained in:
parent
82d9371ed3
commit
c890a3479a
5 changed files with 848 additions and 26 deletions
|
|
@ -21,7 +21,13 @@ path = "src/du.rs"
|
|||
# For the --exclude & --exclude-from options
|
||||
glob = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
uucore = { workspace = true, features = ["format", "fsext", "parser", "time"] }
|
||||
uucore = { workspace = true, features = [
|
||||
"format",
|
||||
"fsext",
|
||||
"parser",
|
||||
"time",
|
||||
"safe-traversal",
|
||||
] }
|
||||
thiserror = { workspace = true }
|
||||
fluent = { workspace = true }
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
// spell-checker:ignore fstatat openat dirfd
|
||||
|
||||
use clap::{Arg, ArgAction, ArgMatches, Command, builder::PossibleValue};
|
||||
use glob::Pattern;
|
||||
|
|
@ -25,6 +26,8 @@ use uucore::display::{Quotable, print_verbatim};
|
|||
use uucore::error::{FromIo, UError, UResult, USimpleError, set_exit_code};
|
||||
use uucore::fsext::{MetadataTimeField, metadata_get_time};
|
||||
use uucore::line_ending::LineEnding;
|
||||
#[cfg(target_os = "linux")]
|
||||
use uucore::safe_traversal::DirFd;
|
||||
use uucore::translate;
|
||||
|
||||
use uucore::parser::parse_glob;
|
||||
|
|
@ -160,6 +163,44 @@ impl Stat {
|
|||
metadata,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a Stat using safe traversal methods with `DirFd` for the root directory
|
||||
#[cfg(target_os = "linux")]
|
||||
fn new_from_dirfd(
|
||||
dir_fd: &DirFd,
|
||||
full_path: &Path,
|
||||
_options: &TraversalOptions,
|
||||
) -> std::io::Result<Self> {
|
||||
// Get metadata for the directory itself using fstat
|
||||
let safe_metadata = dir_fd.metadata()?;
|
||||
|
||||
// Create file info from the safe metadata
|
||||
let file_info = safe_metadata.file_info();
|
||||
let file_info_option = Some(FileInfo {
|
||||
file_id: file_info.inode() as u128,
|
||||
dev_id: file_info.device(),
|
||||
});
|
||||
|
||||
let blocks = safe_metadata.blocks();
|
||||
|
||||
// Create a temporary std::fs::Metadata by reading the same path
|
||||
// This is still needed for compatibility but should work since we're dealing with
|
||||
// the root path which should be accessible
|
||||
let std_metadata = fs::symlink_metadata(full_path)?;
|
||||
|
||||
Ok(Self {
|
||||
path: full_path.to_path_buf(),
|
||||
size: if safe_metadata.is_dir() {
|
||||
0
|
||||
} else {
|
||||
safe_metadata.len()
|
||||
},
|
||||
blocks,
|
||||
inodes: 1,
|
||||
inode: file_info_option,
|
||||
metadata: std_metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
|
|
@ -254,15 +295,301 @@ fn read_block_size(s: Option<&str>) -> UResult<u64> {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
// For now, implement safe_du only on Linux
|
||||
// This is done for Ubuntu but should be extended to other platforms that support openat
|
||||
fn safe_du(
|
||||
path: &Path,
|
||||
options: &TraversalOptions,
|
||||
depth: usize,
|
||||
seen_inodes: &mut HashSet<FileInfo>,
|
||||
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
|
||||
parent_fd: Option<&DirFd>,
|
||||
) -> Result<Stat, Box<mpsc::SendError<UResult<StatPrintInfo>>>> {
|
||||
// Get initial stat for this path - use DirFd if available to avoid path length issues
|
||||
let mut my_stat = if let Some(parent_fd) = parent_fd {
|
||||
// We have a parent fd, this is a subdirectory - use openat
|
||||
let dir_name = path.file_name().unwrap_or(path.as_os_str());
|
||||
match parent_fd.metadata_at(dir_name, false) {
|
||||
Ok(safe_metadata) => {
|
||||
// Create Stat from safe metadata
|
||||
let file_info = safe_metadata.file_info();
|
||||
let file_info_option = Some(FileInfo {
|
||||
file_id: file_info.inode() as u128,
|
||||
dev_id: file_info.device(),
|
||||
});
|
||||
let blocks = safe_metadata.blocks();
|
||||
|
||||
// For compatibility, still try to get std::fs::Metadata
|
||||
// but fallback to a minimal approach if it fails
|
||||
let std_metadata = fs::symlink_metadata(path).unwrap_or_else(|_| {
|
||||
// If we can't get std metadata, create a minimal fake one
|
||||
// This should rarely happen but provides a fallback
|
||||
fs::symlink_metadata("/").expect("root should be accessible")
|
||||
});
|
||||
|
||||
Stat {
|
||||
path: path.to_path_buf(),
|
||||
size: if safe_metadata.is_dir() {
|
||||
0
|
||||
} else {
|
||||
safe_metadata.len()
|
||||
},
|
||||
blocks,
|
||||
inodes: 1,
|
||||
inode: file_info_option,
|
||||
metadata: std_metadata,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let error = e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => path.quote()),
|
||||
);
|
||||
if let Err(send_error) = print_tx.send(Err(error)) {
|
||||
return Err(Box::new(send_error));
|
||||
}
|
||||
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
|
||||
0,
|
||||
"Error already handled",
|
||||
)))));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// This is the initial directory - try regular Stat::new first, then fallback to DirFd
|
||||
match Stat::new(path, None, options) {
|
||||
Ok(s) => s,
|
||||
Err(_e) => {
|
||||
// Try using our new DirFd method for the root directory
|
||||
match DirFd::open(path) {
|
||||
Ok(dir_fd) => match Stat::new_from_dirfd(&dir_fd, path, options) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
let error = e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => path.quote()),
|
||||
);
|
||||
if let Err(send_error) = print_tx.send(Err(error)) {
|
||||
return Err(Box::new(send_error));
|
||||
}
|
||||
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
|
||||
0,
|
||||
"Error already handled",
|
||||
)))));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
let error = e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => path.quote()),
|
||||
);
|
||||
if let Err(send_error) = print_tx.send(Err(error)) {
|
||||
return Err(Box::new(send_error));
|
||||
}
|
||||
return Err(Box::new(mpsc::SendError(Err(USimpleError::new(
|
||||
0,
|
||||
"Error already handled",
|
||||
)))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
if !my_stat.metadata.is_dir() {
|
||||
return Ok(my_stat);
|
||||
}
|
||||
|
||||
// Open the directory using DirFd
|
||||
let open_result = match parent_fd {
|
||||
Some(parent) => parent.open_subdir(path.file_name().unwrap_or(path.as_os_str())),
|
||||
None => DirFd::open(path),
|
||||
};
|
||||
|
||||
let dir_fd = match open_result {
|
||||
Ok(fd) => fd,
|
||||
Err(e) => {
|
||||
print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-read-directory", "path" => path.quote()),
|
||||
)))?;
|
||||
return Ok(my_stat);
|
||||
}
|
||||
};
|
||||
|
||||
// Read directory entries
|
||||
let entries = match dir_fd.read_dir() {
|
||||
Ok(entries) => entries,
|
||||
Err(e) => {
|
||||
print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-read-directory", "path" => path.quote()),
|
||||
)))?;
|
||||
return Ok(my_stat);
|
||||
}
|
||||
};
|
||||
|
||||
'file_loop: for entry_name in entries {
|
||||
let entry_path = path.join(&entry_name);
|
||||
|
||||
// First get the lstat (without following symlinks) to check if it's a symlink
|
||||
let lstat = match dir_fd.stat_at(&entry_name, false) {
|
||||
Ok(stat) => stat,
|
||||
Err(e) => {
|
||||
print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
|
||||
)))?;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Check if it's a symlink
|
||||
const S_IFMT: u32 = 0o170_000;
|
||||
const S_IFDIR: u32 = 0o040_000;
|
||||
const S_IFLNK: u32 = 0o120_000;
|
||||
let is_symlink = (lstat.st_mode & S_IFMT) == S_IFLNK;
|
||||
|
||||
// Handle symlinks with -L option
|
||||
// For safe traversal with -L, we skip symlinks to directories entirely
|
||||
// and let the non-safe traversal handle them at the top level
|
||||
let (entry_stat, is_dir) = if is_symlink && options.dereference == Deref::All {
|
||||
// Skip symlinks to directories when using safe traversal with -L
|
||||
// They will be handled by regular traversal
|
||||
continue;
|
||||
} else {
|
||||
let is_dir = (lstat.st_mode & S_IFMT) == S_IFDIR;
|
||||
(lstat, is_dir)
|
||||
};
|
||||
|
||||
let file_info = if entry_stat.st_ino != 0 {
|
||||
Some(FileInfo {
|
||||
file_id: entry_stat.st_ino as u128,
|
||||
dev_id: entry_stat.st_dev,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// For safe traversal, we need to handle stats differently
|
||||
// We can't use std::fs::Metadata since that requires the full path
|
||||
let this_stat = if is_dir {
|
||||
// For directories, recurse using safe_du
|
||||
Stat {
|
||||
path: entry_path.clone(),
|
||||
size: 0,
|
||||
blocks: entry_stat.st_blocks as u64,
|
||||
inodes: 1,
|
||||
inode: file_info,
|
||||
// We need a fake metadata - create one from symlink_metadata of parent
|
||||
// This is a workaround since we can't get real metadata without the full path
|
||||
metadata: my_stat.metadata.clone(),
|
||||
}
|
||||
} else {
|
||||
// For files
|
||||
Stat {
|
||||
path: entry_path.clone(),
|
||||
size: entry_stat.st_size as u64,
|
||||
blocks: entry_stat.st_blocks as u64,
|
||||
inodes: 1,
|
||||
inode: file_info,
|
||||
metadata: my_stat.metadata.clone(),
|
||||
}
|
||||
};
|
||||
|
||||
// Check excludes
|
||||
for pattern in &options.excludes {
|
||||
if pattern.matches(&this_stat.path.to_string_lossy())
|
||||
|| pattern.matches(&entry_name.to_string_lossy())
|
||||
{
|
||||
if options.verbose {
|
||||
println!(
|
||||
"{}",
|
||||
translate!("du-verbose-ignored", "path" => this_stat.path.quote())
|
||||
);
|
||||
}
|
||||
continue 'file_loop;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle inodes
|
||||
if let Some(inode) = this_stat.inode {
|
||||
if seen_inodes.contains(&inode) && (!options.count_links || !options.all) {
|
||||
if options.count_links && !options.all {
|
||||
my_stat.inodes += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
seen_inodes.insert(inode);
|
||||
}
|
||||
|
||||
// Process directories recursively
|
||||
if is_dir {
|
||||
if options.one_file_system {
|
||||
if let (Some(this_inode), Some(my_inode)) = (this_stat.inode, my_stat.inode) {
|
||||
if this_inode.dev_id != my_inode.dev_id {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let this_stat = safe_du(
|
||||
&entry_path,
|
||||
options,
|
||||
depth + 1,
|
||||
seen_inodes,
|
||||
print_tx,
|
||||
Some(&dir_fd),
|
||||
)?;
|
||||
|
||||
if !options.separate_dirs {
|
||||
my_stat.size += this_stat.size;
|
||||
my_stat.blocks += this_stat.blocks;
|
||||
my_stat.inodes += this_stat.inodes;
|
||||
}
|
||||
print_tx.send(Ok(StatPrintInfo {
|
||||
stat: this_stat,
|
||||
depth: depth + 1,
|
||||
}))?;
|
||||
} else {
|
||||
my_stat.size += this_stat.size;
|
||||
my_stat.blocks += this_stat.blocks;
|
||||
my_stat.inodes += 1;
|
||||
if options.all {
|
||||
print_tx.send(Ok(StatPrintInfo {
|
||||
stat: this_stat,
|
||||
depth: depth + 1,
|
||||
}))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(my_stat)
|
||||
}
|
||||
|
||||
// this takes `my_stat` to avoid having to stat files multiple times.
|
||||
// Only used on non-Linux platforms
|
||||
// Regular traversal using std::fs
|
||||
// Used on non-Linux platforms and as fallback for symlinks on Linux
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
fn du(
|
||||
fn du_regular(
|
||||
mut my_stat: Stat,
|
||||
options: &TraversalOptions,
|
||||
depth: usize,
|
||||
seen_inodes: &mut HashSet<FileInfo>,
|
||||
print_tx: &mpsc::Sender<UResult<StatPrintInfo>>,
|
||||
ancestors: Option<&mut HashSet<FileInfo>>,
|
||||
symlink_depth: Option<usize>,
|
||||
) -> Result<Stat, Box<mpsc::SendError<UResult<StatPrintInfo>>>> {
|
||||
let mut default_ancestors = HashSet::new();
|
||||
let ancestors = ancestors.unwrap_or(&mut default_ancestors);
|
||||
let symlink_depth = symlink_depth.unwrap_or(0);
|
||||
// Maximum symlink depth to prevent infinite loops
|
||||
const MAX_SYMLINK_DEPTH: usize = 40;
|
||||
|
||||
// Add current directory to ancestors if it's a directory
|
||||
let my_inode = if my_stat.metadata.is_dir() {
|
||||
my_stat.inode
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(inode) = my_inode {
|
||||
ancestors.insert(inode);
|
||||
}
|
||||
if my_stat.metadata.is_dir() {
|
||||
let read = match fs::read_dir(&my_stat.path) {
|
||||
Ok(read) => read,
|
||||
|
|
@ -277,8 +604,46 @@ fn du(
|
|||
'file_loop: for f in read {
|
||||
match f {
|
||||
Ok(entry) => {
|
||||
match Stat::new(&entry.path(), Some(&entry), options) {
|
||||
let entry_path = entry.path();
|
||||
|
||||
// Check if this is a symlink when using -L
|
||||
let mut current_symlink_depth = symlink_depth;
|
||||
let is_symlink = match entry.file_type() {
|
||||
Ok(ft) => ft.is_symlink(),
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
if is_symlink && options.dereference == Deref::All {
|
||||
// Increment symlink depth
|
||||
current_symlink_depth += 1;
|
||||
|
||||
// Check symlink depth limit
|
||||
if current_symlink_depth > MAX_SYMLINK_DEPTH {
|
||||
print_tx.send(Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"Too many levels of symbolic links",
|
||||
).map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
|
||||
)))?;
|
||||
continue 'file_loop;
|
||||
}
|
||||
}
|
||||
|
||||
match Stat::new(&entry_path, Some(&entry), options) {
|
||||
Ok(this_stat) => {
|
||||
// Check if symlink with -L points to an ancestor (cycle detection)
|
||||
if is_symlink
|
||||
&& options.dereference == Deref::All
|
||||
&& this_stat.metadata.is_dir()
|
||||
{
|
||||
if let Some(inode) = this_stat.inode {
|
||||
if ancestors.contains(&inode) {
|
||||
// This symlink points to an ancestor directory - skip to avoid cycle
|
||||
continue 'file_loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We have an exclude list
|
||||
for pattern in &options.excludes {
|
||||
// Look at all patterns with both short and long paths
|
||||
|
|
@ -326,8 +691,15 @@ fn du(
|
|||
}
|
||||
}
|
||||
|
||||
let this_stat =
|
||||
du(this_stat, options, depth + 1, seen_inodes, print_tx)?;
|
||||
let this_stat = du_regular(
|
||||
this_stat,
|
||||
options,
|
||||
depth + 1,
|
||||
seen_inodes,
|
||||
print_tx,
|
||||
Some(ancestors),
|
||||
Some(current_symlink_depth),
|
||||
)?;
|
||||
|
||||
if !options.separate_dirs {
|
||||
my_stat.size += this_stat.size;
|
||||
|
|
@ -350,9 +722,20 @@ fn du(
|
|||
}
|
||||
}
|
||||
}
|
||||
Err(e) => print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => entry.path().quote()),
|
||||
)))?,
|
||||
Err(e) => {
|
||||
// Check if this is the "too many symlinks" error we want to catch
|
||||
if e.kind() == std::io::ErrorKind::InvalidData
|
||||
&& e.to_string().contains("Too many levels")
|
||||
{
|
||||
print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
|
||||
)))?;
|
||||
} else {
|
||||
print_tx.send(Err(e.map_err_context(
|
||||
|| translate!("du-error-cannot-access", "path" => entry_path.quote()),
|
||||
)))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(error) => print_tx.send(Err(error.into()))?,
|
||||
|
|
@ -360,6 +743,11 @@ fn du(
|
|||
}
|
||||
}
|
||||
|
||||
// Remove current directory from ancestors before returning
|
||||
if let Some(inode) = my_inode {
|
||||
ancestors.remove(&inode);
|
||||
}
|
||||
|
||||
Ok(my_stat)
|
||||
}
|
||||
|
||||
|
|
@ -727,25 +1115,80 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
}
|
||||
|
||||
// Check existence of path provided in argument
|
||||
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
|
||||
// Kick off the computation of disk usage from the initial path
|
||||
let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
|
||||
if let Some(inode) = stat.inode {
|
||||
seen_inodes.insert(inode);
|
||||
let mut seen_inodes: HashSet<FileInfo> = HashSet::new();
|
||||
|
||||
// Determine which traversal method to use
|
||||
#[cfg(target_os = "linux")]
|
||||
let use_safe_traversal = traversal_options.dereference != Deref::All;
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let use_safe_traversal = false;
|
||||
|
||||
if use_safe_traversal {
|
||||
// Use safe traversal (Linux only, when not using -L)
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
// Pre-populate seen_inodes with the starting directory to detect cycles
|
||||
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
|
||||
if let Some(inode) = stat.inode {
|
||||
seen_inodes.insert(inode);
|
||||
}
|
||||
}
|
||||
|
||||
match safe_du(
|
||||
&path,
|
||||
&traversal_options,
|
||||
0,
|
||||
&mut seen_inodes,
|
||||
&print_tx,
|
||||
None,
|
||||
) {
|
||||
Ok(stat) => {
|
||||
print_tx
|
||||
.send(Ok(StatPrintInfo { stat, depth: 0 }))
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
}
|
||||
Err(e) => {
|
||||
// Check if this is our "already handled" error
|
||||
if let mpsc::SendError(Err(simple_error)) = e.as_ref() {
|
||||
if simple_error.code() == 0 {
|
||||
// Error already handled, continue to next file
|
||||
continue 'loop_file;
|
||||
}
|
||||
}
|
||||
return Err(USimpleError::new(1, e.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
let stat = du(stat, &traversal_options, 0, &mut seen_inodes, &print_tx)
|
||||
} else {
|
||||
// Use regular traversal (non-Linux or when -L is used)
|
||||
if let Ok(stat) = Stat::new(&path, None, &traversal_options) {
|
||||
if let Some(inode) = stat.inode {
|
||||
seen_inodes.insert(inode);
|
||||
}
|
||||
let stat = du_regular(
|
||||
stat,
|
||||
&traversal_options,
|
||||
0,
|
||||
&mut seen_inodes,
|
||||
&print_tx,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
|
||||
print_tx
|
||||
.send(Ok(StatPrintInfo { stat, depth: 0 }))
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
} else {
|
||||
print_tx
|
||||
.send(Err(USimpleError::new(
|
||||
1,
|
||||
translate!("du-error-cannot-access-no-such-file", "path" => path.to_string_lossy().quote()),
|
||||
)))
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
print_tx
|
||||
.send(Ok(StatPrintInfo { stat, depth: 0 }))
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
} else {
|
||||
#[cfg(target_os = "linux")]
|
||||
let error_msg = translate!("du-error-cannot-access", "path" => path.quote());
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
let error_msg = translate!("du-error-cannot-access-no-such-file", "path" => path.to_string_lossy().quote());
|
||||
|
||||
print_tx
|
||||
.send(Err(USimpleError::new(1, error_msg)))
|
||||
.map_err(|e| USimpleError::new(1, e.to_string()))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
// Safe directory traversal using openat() and related syscalls
|
||||
// This module provides TOCTOU-safe filesystem operations for recursive traversal
|
||||
// Only available on Linux
|
||||
// spell-checker:ignore CLOEXEC RDONLY TOCTOU closedir dirp fdopendir fstatat openat
|
||||
// spell-checker:ignore CLOEXEC RDONLY TOCTOU closedir dirp fdopendir fstatat openat REMOVEDIR unlinkat
|
||||
// spell-checker:ignore RAII dirfd
|
||||
|
||||
#![cfg(target_os = "linux")]
|
||||
|
||||
|
|
@ -291,7 +292,6 @@ impl FileInfo {
|
|||
ino: stat.st_ino as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create FileInfo from device and inode numbers
|
||||
pub fn new(dev: u64, ino: u64) -> Self {
|
||||
|
|
@ -384,6 +384,144 @@ impl Metadata {
|
|||
pub fn as_raw_stat(&self) -> &libc::stat {
|
||||
&self.stat
|
||||
}
|
||||
|
||||
/// Compatibility methods to match std::fs::Metadata interface
|
||||
pub fn is_dir(&self) -> bool {
|
||||
self.file_type().is_directory()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u64 {
|
||||
self.size()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
// Add MetadataExt trait implementation for compatibility
|
||||
#[cfg(not(windows))]
|
||||
impl std::os::unix::fs::MetadataExt for Metadata {
|
||||
fn dev(&self) -> u64 {
|
||||
self.stat.st_dev
|
||||
}
|
||||
|
||||
fn ino(&self) -> u64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_ino.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_ino
|
||||
}
|
||||
}
|
||||
|
||||
fn mode(&self) -> u32 {
|
||||
self.stat.st_mode
|
||||
}
|
||||
|
||||
fn nlink(&self) -> u64 {
|
||||
// st_nlink is u32 on most platforms except x86_64
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
self.stat.st_nlink
|
||||
}
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
{
|
||||
self.stat.st_nlink.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn uid(&self) -> u32 {
|
||||
self.stat.st_uid
|
||||
}
|
||||
|
||||
fn gid(&self) -> u32 {
|
||||
self.stat.st_gid
|
||||
}
|
||||
|
||||
fn rdev(&self) -> u64 {
|
||||
self.stat.st_rdev
|
||||
}
|
||||
|
||||
fn size(&self) -> u64 {
|
||||
self.stat.st_size as u64
|
||||
}
|
||||
|
||||
fn atime(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_atime.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_atime
|
||||
}
|
||||
}
|
||||
|
||||
fn atime_nsec(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_atime_nsec.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_atime_nsec
|
||||
}
|
||||
}
|
||||
|
||||
fn mtime(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_mtime.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_mtime
|
||||
}
|
||||
}
|
||||
|
||||
fn mtime_nsec(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_mtime_nsec.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_mtime_nsec
|
||||
}
|
||||
}
|
||||
|
||||
fn ctime(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_ctime.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_ctime
|
||||
}
|
||||
}
|
||||
|
||||
fn ctime_nsec(&self) -> i64 {
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
{
|
||||
self.stat.st_ctime_nsec.into()
|
||||
}
|
||||
#[cfg(not(target_pointer_width = "32"))]
|
||||
{
|
||||
self.stat.st_ctime_nsec
|
||||
}
|
||||
}
|
||||
|
||||
fn blksize(&self) -> u64 {
|
||||
self.stat.st_blksize as u64
|
||||
}
|
||||
|
||||
fn blocks(&self) -> u64 {
|
||||
self.stat.st_blocks as u64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -647,3 +785,4 @@ mod tests {
|
|||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile
|
||||
// spell-checker:ignore selfref ELOOP
|
||||
#[cfg(not(windows))]
|
||||
use regex::Regex;
|
||||
|
||||
|
|
@ -1439,3 +1440,235 @@ fn test_du_threshold_no_suggested_values() {
|
|||
let result = ts.ucmd().arg("--threshold").fails();
|
||||
assert!(!result.stderr_str().contains("[possible values: ]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_du_long_path_safe_traversal() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
let mut deep_path = String::from("long_path_test");
|
||||
at.mkdir(&deep_path);
|
||||
|
||||
for i in 0..15 {
|
||||
let long_dir_name = format!("{}{}", "a".repeat(100), i);
|
||||
deep_path = format!("{deep_path}/{long_dir_name}");
|
||||
at.mkdir_all(&deep_path);
|
||||
}
|
||||
|
||||
let test_file = format!("{deep_path}/test.txt");
|
||||
at.write(&test_file, "test content");
|
||||
|
||||
let result = ts.ucmd().arg("-s").arg("long_path_test").succeeds();
|
||||
assert!(result.stdout_str().contains("long_path_test"));
|
||||
|
||||
let result = ts.ucmd().arg("long_path_test").succeeds();
|
||||
let lines: Vec<&str> = result.stdout_str().trim().lines().collect();
|
||||
assert!(lines.len() >= 15);
|
||||
}
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_du_very_deep_directory() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
let mut current_path = String::from("x");
|
||||
at.mkdir(¤t_path);
|
||||
|
||||
for _ in 0..10 {
|
||||
current_path = format!("{current_path}/x");
|
||||
at.mkdir_all(¤t_path);
|
||||
}
|
||||
|
||||
at.write(&format!("{current_path}/file.txt"), "deep file");
|
||||
|
||||
let result = ts.ucmd().arg("-s").arg("x").succeeds();
|
||||
assert!(result.stdout_str().contains('x'));
|
||||
|
||||
let result = ts.ucmd().arg("-a").arg("x").succeeds();
|
||||
let output = result.stdout_str();
|
||||
assert!(output.contains("file.txt"));
|
||||
}
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_du_safe_traversal_with_symlinks() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
let mut deep_path = String::from("symlink_test");
|
||||
at.mkdir(&deep_path);
|
||||
|
||||
for i in 0..8 {
|
||||
let dir_name = format!("{}{}", "b".repeat(50), i);
|
||||
deep_path = format!("{deep_path}/{dir_name}");
|
||||
at.mkdir_all(&deep_path);
|
||||
}
|
||||
|
||||
at.write(&format!("{deep_path}/target.txt"), "target content");
|
||||
|
||||
at.symlink_file(&format!("{deep_path}/target.txt"), "shallow_link.txt");
|
||||
|
||||
let result = ts.ucmd().arg("-L").arg("shallow_link.txt").succeeds();
|
||||
assert!(!result.stdout_str().is_empty());
|
||||
|
||||
let result = ts.ucmd().arg("shallow_link.txt").succeeds();
|
||||
assert!(!result.stdout_str().is_empty());
|
||||
}
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_du_inaccessible_directory() {
|
||||
// tested by tests/du/no-x
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = ts.fixtures.clone();
|
||||
|
||||
at.mkdir("d");
|
||||
at.mkdir("d/no-x");
|
||||
at.mkdir("d/no-x/y");
|
||||
|
||||
at.set_mode("d/no-x", 0o600);
|
||||
|
||||
let result = ts.ucmd().arg("d").fails();
|
||||
result.stderr_contains("du: cannot access 'd/no-x/y': Permission denied");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_du_symlink_self_reference() {
|
||||
// Test symlink that points to its own directory
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir("selfref");
|
||||
at.symlink_dir("selfref", "selfref/self");
|
||||
|
||||
let result = ts.ucmd().arg("-L").arg("selfref").succeeds();
|
||||
|
||||
result.stdout_contains("selfref");
|
||||
// Should not show the self-referencing symlink to avoid infinite recursion
|
||||
result.stdout_does_not_contain("selfref/self");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_du_long_symlink_chain() {
|
||||
// Test that very long symlink chains are handled gracefully
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
// Create a simple structure that tests symlink depth limits
|
||||
// Instead of trying to create a chain that causes ELOOP, test that reasonable chains work
|
||||
at.mkdir_all("deep/level1/level2/level3/level4/level5");
|
||||
at.write(
|
||||
"deep/level1/level2/level3/level4/level5/file.txt",
|
||||
"content",
|
||||
);
|
||||
|
||||
at.symlink_dir("deep/level1", "link1");
|
||||
at.symlink_dir("link1/level2", "link2");
|
||||
at.symlink_dir("link2/level3", "link3");
|
||||
|
||||
let result = ts.ucmd().arg("-L").arg("link3").succeeds();
|
||||
result.stdout_contains("link3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
fn test_du_bind_mount_simulation() {
|
||||
// Simulate bind mount scenario using hard links where possible
|
||||
// Note: This test simulates what bind mounts do - making the same directory
|
||||
// appear in multiple places with the same inode
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("mount_test/subdir");
|
||||
at.write("mount_test/file1.txt", "content1");
|
||||
at.write("mount_test/subdir/file2.txt", "content2");
|
||||
|
||||
// On systems where we can't create actual bind mounts,
|
||||
// we test that cycle detection works with symlinks that would create similar cycles
|
||||
at.symlink_dir("../mount_test", "mount_test/subdir/cycle_link");
|
||||
|
||||
let result = ts.ucmd().arg("mount_test").succeeds();
|
||||
|
||||
result.stdout_contains("mount_test/subdir");
|
||||
result.stdout_contains("mount_test");
|
||||
|
||||
result.stdout_does_not_contain("mount_test/subdir/cycle_link");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_du_symlink_depth_tracking() {
|
||||
// Test that du can handle reasonable symlink chains without hitting depth limits
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("chain/dir1/dir2/dir3");
|
||||
at.write("chain/dir1/dir2/dir3/file.txt", "content");
|
||||
|
||||
at.symlink_dir("chain/dir1/dir2", "shortcut");
|
||||
|
||||
let result = ts.ucmd().arg("-L").arg("shortcut").succeeds();
|
||||
result.stdout_contains("shortcut/dir3");
|
||||
result.stdout_contains("shortcut");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_du_long_path_from_unreadable() {
|
||||
// Test the specific scenario from GNU's long-from-unreadable.sh test
|
||||
// This verifies that du can handle very long paths when the current directory is unreadable
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
// Create a deep hierarchy similar to the GNU test
|
||||
// Use a more reasonable depth for unit tests
|
||||
let dir_name = "x".repeat(200);
|
||||
let mut current_path = String::new();
|
||||
|
||||
for i in 0..20 {
|
||||
if i == 0 {
|
||||
current_path = dir_name.clone();
|
||||
} else {
|
||||
current_path = format!("{current_path}/{dir_name}");
|
||||
}
|
||||
at.mkdir_all(¤t_path);
|
||||
}
|
||||
|
||||
at.write(&format!("{current_path}/test.txt"), "test content");
|
||||
|
||||
at.mkdir("inaccessible");
|
||||
|
||||
let original_cwd = env::current_dir().unwrap();
|
||||
|
||||
let inaccessible_path = at.plus("inaccessible");
|
||||
env::set_current_dir(&inaccessible_path).unwrap();
|
||||
|
||||
// Remove read permission from the directory
|
||||
let mut perms = fs::metadata(&inaccessible_path).unwrap().permissions();
|
||||
perms.set_mode(0o000);
|
||||
fs::set_permissions(&inaccessible_path, perms).unwrap();
|
||||
|
||||
// Try to run du on the long path from the unreadable directory
|
||||
let target_path = at.plus(&dir_name);
|
||||
let result = ts.ucmd().arg("-s").arg(&target_path).succeeds(); // Should succeed with safe traversal
|
||||
|
||||
assert!(!result.stdout_str().is_empty());
|
||||
let output = result.stdout_str().trim();
|
||||
let parts: Vec<&str> = output.split_whitespace().collect();
|
||||
assert_eq!(parts.len(), 2);
|
||||
|
||||
assert!(parts[0].parse::<u64>().is_ok());
|
||||
assert!(parts[1].contains(&dir_name[..50])); // Check first part of the long name
|
||||
|
||||
env::set_current_dir(&original_cwd).unwrap();
|
||||
|
||||
// Restore permissions so the directory can be cleaned up
|
||||
let mut perms = fs::metadata(&inaccessible_path).unwrap().permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&inaccessible_path, perms).unwrap();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
# spell-checker:ignore (paths) abmon deref discrim eacces getlimits getopt ginstall inacc infloop inotify reflink ; (misc) INT_OFLOW OFLOW
|
||||
# spell-checker:ignore baddecode submodules xstrtol distros ; (vars/env) SRCDIR vdir rcexp xpart dired OSTYPE ; (utils) gnproc greadlink gsed multihardlink texinfo CARGOFLAGS
|
||||
# spell-checker:ignore openat TOCTOU
|
||||
|
||||
set -e
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue