mirror of
https://github.com/uutils/coreutils.git
synced 2025-07-07 21:45:01 +00:00
cat: Fix reporting "input file is output file" error when outputting to an input file (#8025)
* cat: Check if a file can be overwritten safely in Unix * cat: Check if a file can be overwritten safely in Windows * cat: Test writing read-write file that is input and output * cat: Unit test `is_appending` function * cat: Unit test `is_unsafe_overwrite` function * cat: Comment why a few function calls could return Err * cat: Remove obvious comments from test
This commit is contained in:
parent
9e21259e2d
commit
4d40671d79
7 changed files with 253 additions and 43 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -2613,8 +2613,11 @@ dependencies = [
|
|||
"clap",
|
||||
"memchr",
|
||||
"nix",
|
||||
"tempfile",
|
||||
"thiserror 2.0.12",
|
||||
"uucore",
|
||||
"winapi-util",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -26,6 +26,13 @@ uucore = { workspace = true, features = ["fast-inc", "fs", "pipes"] }
|
|||
[target.'cfg(unix)'.dependencies]
|
||||
nix = { workspace = true }
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi-util = { workspace = true }
|
||||
windows-sys = { workspace = true, features = ["Win32_Storage_FileSystem"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
|
||||
[[bin]]
|
||||
name = "cat"
|
||||
path = "src/main.rs"
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (ToDO) nonprint nonblank nonprinting ELOOP
|
||||
|
||||
mod platform;
|
||||
|
||||
use crate::platform::is_unsafe_overwrite;
|
||||
use std::fs::{File, metadata};
|
||||
use std::io::{self, BufWriter, IsTerminal, Read, Write};
|
||||
/// Unix domain socket support
|
||||
|
@ -18,12 +22,9 @@ use std::os::unix::net::UnixStream;
|
|||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use memchr::memchr2;
|
||||
#[cfg(unix)]
|
||||
use nix::fcntl::{FcntlArg, fcntl};
|
||||
use thiserror::Error;
|
||||
use uucore::display::Quotable;
|
||||
use uucore::error::UResult;
|
||||
use uucore::fs::FileInformation;
|
||||
use uucore::locale::get_message;
|
||||
use uucore::{fast_inc::fast_inc_one, format_usage};
|
||||
|
||||
|
@ -366,42 +367,17 @@ fn cat_handle<R: FdReadable>(
|
|||
}
|
||||
}
|
||||
|
||||
/// Whether this process is appending to stdout.
|
||||
#[cfg(unix)]
|
||||
fn is_appending() -> bool {
|
||||
let stdout = io::stdout();
|
||||
let Ok(flags) = fcntl(stdout.as_fd(), FcntlArg::F_GETFL) else {
|
||||
return false;
|
||||
};
|
||||
// TODO Replace `1 << 10` with `nix::fcntl::Oflag::O_APPEND`.
|
||||
let o_append = 1 << 10;
|
||||
(flags & o_append) > 0
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn is_appending() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn cat_path(
|
||||
path: &str,
|
||||
options: &OutputOptions,
|
||||
state: &mut OutputState,
|
||||
out_info: Option<&FileInformation>,
|
||||
) -> CatResult<()> {
|
||||
fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
|
||||
match get_input_type(path)? {
|
||||
InputType::StdIn => {
|
||||
let stdin = io::stdin();
|
||||
let in_info = FileInformation::from_file(&stdin)?;
|
||||
if is_unsafe_overwrite(&stdin, &io::stdout()) {
|
||||
return Err(CatError::OutputIsInput);
|
||||
}
|
||||
let mut handle = InputHandle {
|
||||
reader: stdin,
|
||||
is_interactive: io::stdin().is_terminal(),
|
||||
};
|
||||
if let Some(out_info) = out_info {
|
||||
if in_info == *out_info && is_appending() {
|
||||
return Err(CatError::OutputIsInput);
|
||||
}
|
||||
}
|
||||
cat_handle(&mut handle, options, state)
|
||||
}
|
||||
InputType::Directory => Err(CatError::IsDirectory),
|
||||
|
@ -417,15 +393,9 @@ fn cat_path(
|
|||
}
|
||||
_ => {
|
||||
let file = File::open(path)?;
|
||||
|
||||
if let Some(out_info) = out_info {
|
||||
if out_info.file_size() != 0
|
||||
&& FileInformation::from_file(&file).ok().as_ref() == Some(out_info)
|
||||
{
|
||||
return Err(CatError::OutputIsInput);
|
||||
}
|
||||
if is_unsafe_overwrite(&file, &io::stdout()) {
|
||||
return Err(CatError::OutputIsInput);
|
||||
}
|
||||
|
||||
let mut handle = InputHandle {
|
||||
reader: file,
|
||||
is_interactive: false,
|
||||
|
@ -436,8 +406,6 @@ fn cat_path(
|
|||
}
|
||||
|
||||
fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
|
||||
let out_info = FileInformation::from_file(&io::stdout()).ok();
|
||||
|
||||
let mut state = OutputState {
|
||||
line_number: LineNumber::new(),
|
||||
at_line_start: true,
|
||||
|
@ -447,7 +415,7 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
|
|||
let mut error_messages: Vec<String> = Vec::new();
|
||||
|
||||
for path in files {
|
||||
if let Err(err) = cat_path(path, options, &mut state, out_info.as_ref()) {
|
||||
if let Err(err) = cat_path(path, options, &mut state) {
|
||||
error_messages.push(format!("{}: {err}", path.maybe_quote()));
|
||||
}
|
||||
}
|
||||
|
|
16
src/uu/cat/src/platform/mod.rs
Normal file
16
src/uu/cat/src/platform/mod.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use self::unix::is_unsafe_overwrite;
|
||||
|
||||
#[cfg(windows)]
|
||||
pub use self::windows::is_unsafe_overwrite;
|
||||
|
||||
#[cfg(unix)]
|
||||
mod unix;
|
||||
|
||||
#[cfg(windows)]
|
||||
mod windows;
|
108
src/uu/cat/src/platform/unix.rs
Normal file
108
src/uu/cat/src/platform/unix.rs
Normal file
|
@ -0,0 +1,108 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore lseek seekable
|
||||
|
||||
use nix::fcntl::{FcntlArg, OFlag, fcntl};
|
||||
use nix::unistd::{Whence, lseek};
|
||||
use std::os::fd::AsFd;
|
||||
use uucore::fs::FileInformation;
|
||||
|
||||
/// An unsafe overwrite occurs when the same nonempty file is used as both stdin and stdout,
|
||||
/// and the file offset of stdin is positioned earlier than that of stdout.
|
||||
/// In this scenario, bytes read from stdin are written to a later part of the file
|
||||
/// via stdout, which can then be read again by stdin and written again by stdout,
|
||||
/// causing an infinite loop and potential file corruption.
|
||||
pub fn is_unsafe_overwrite<I: AsFd, O: AsFd>(input: &I, output: &O) -> bool {
|
||||
// `FileInformation::from_file` returns an error if the file descriptor is closed, invalid,
|
||||
// or refers to a non-regular file (e.g., socket, pipe, or special device).
|
||||
let Ok(input_info) = FileInformation::from_file(input) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(output_info) = FileInformation::from_file(output) else {
|
||||
return false;
|
||||
};
|
||||
if input_info != output_info || output_info.file_size() == 0 {
|
||||
return false;
|
||||
}
|
||||
if is_appending(output) {
|
||||
return true;
|
||||
}
|
||||
// `lseek` returns an error if the file descriptor is closed or it refers to
|
||||
// a non-seekable resource (e.g., pipe, socket, or some devices).
|
||||
let Ok(input_pos) = lseek(input.as_fd(), 0, Whence::SeekCur) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(output_pos) = lseek(output.as_fd(), 0, Whence::SeekCur) else {
|
||||
return false;
|
||||
};
|
||||
input_pos < output_pos
|
||||
}
|
||||
|
||||
/// Whether the file is opened with the `O_APPEND` flag
|
||||
fn is_appending<F: AsFd>(file: &F) -> bool {
|
||||
let flags_raw = fcntl(file.as_fd(), FcntlArg::F_GETFL).unwrap_or_default();
|
||||
let flags = OFlag::from_bits_truncate(flags_raw);
|
||||
flags.contains(OFlag::O_APPEND)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::platform::unix::{is_appending, is_unsafe_overwrite};
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_is_appending() {
|
||||
let temp_file = NamedTempFile::new().unwrap();
|
||||
assert!(!is_appending(&temp_file));
|
||||
|
||||
let read_file = OpenOptions::new().read(true).open(&temp_file).unwrap();
|
||||
assert!(!is_appending(&read_file));
|
||||
|
||||
let write_file = OpenOptions::new().write(true).open(&temp_file).unwrap();
|
||||
assert!(!is_appending(&write_file));
|
||||
|
||||
let append_file = OpenOptions::new().append(true).open(&temp_file).unwrap();
|
||||
assert!(is_appending(&append_file));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsafe_overwrite() {
|
||||
// Create two temp files one of which is empty
|
||||
let empty = NamedTempFile::new().unwrap();
|
||||
let mut nonempty = NamedTempFile::new().unwrap();
|
||||
nonempty.write_all(b"anything").unwrap();
|
||||
nonempty.seek(SeekFrom::Start(0)).unwrap();
|
||||
|
||||
// Using a different file as input and output does not result in an overwrite
|
||||
assert!(!is_unsafe_overwrite(&empty, &nonempty));
|
||||
|
||||
// Overwriting an empty file is always safe
|
||||
assert!(!is_unsafe_overwrite(&empty, &empty));
|
||||
|
||||
// Overwriting a nonempty file with itself is safe
|
||||
assert!(!is_unsafe_overwrite(&nonempty, &nonempty));
|
||||
|
||||
// Overwriting an empty file opened in append mode is safe
|
||||
let empty_append = OpenOptions::new().append(true).open(&empty).unwrap();
|
||||
assert!(!is_unsafe_overwrite(&empty, &empty_append));
|
||||
|
||||
// Overwriting a nonempty file opened in append mode is unsafe
|
||||
let nonempty_append = OpenOptions::new().append(true).open(&nonempty).unwrap();
|
||||
assert!(is_unsafe_overwrite(&nonempty, &nonempty_append));
|
||||
|
||||
// Overwriting a file opened in write mode is safe
|
||||
let mut nonempty_write = OpenOptions::new().write(true).open(&nonempty).unwrap();
|
||||
assert!(!is_unsafe_overwrite(&nonempty, &nonempty_write));
|
||||
|
||||
// Overwriting a file when the input and output file descriptors are pointing to
|
||||
// different offsets is safe if the input offset is further than the output offset
|
||||
nonempty_write.seek(SeekFrom::Start(1)).unwrap();
|
||||
assert!(!is_unsafe_overwrite(&nonempty_write, &nonempty));
|
||||
assert!(is_unsafe_overwrite(&nonempty, &nonempty_write));
|
||||
}
|
||||
}
|
56
src/uu/cat/src/platform/windows.rs
Normal file
56
src/uu/cat/src/platform/windows.rs
Normal file
|
@ -0,0 +1,56 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::ffi::OsString;
|
||||
use std::os::windows::ffi::OsStringExt;
|
||||
use std::path::PathBuf;
|
||||
use uucore::fs::FileInformation;
|
||||
use winapi_util::AsHandleRef;
|
||||
use windows_sys::Win32::Storage::FileSystem::{
|
||||
FILE_NAME_NORMALIZED, GetFinalPathNameByHandleW, VOLUME_NAME_NT,
|
||||
};
|
||||
|
||||
/// An unsafe overwrite occurs when the same file is used as both stdin and stdout
|
||||
/// and the stdout file is not empty.
|
||||
pub fn is_unsafe_overwrite<I: AsHandleRef, O: AsHandleRef>(input: &I, output: &O) -> bool {
|
||||
if !is_same_file_by_path(input, output) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the output file is empty
|
||||
FileInformation::from_file(output)
|
||||
.map(|info| info.file_size() > 0)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Get the file path for a file handle
|
||||
fn get_file_path_from_handle<F: AsHandleRef>(file: &F) -> Option<PathBuf> {
|
||||
let handle = file.as_raw();
|
||||
let mut path_buf = vec![0u16; 4096];
|
||||
|
||||
// SAFETY: We should check how many bytes was written to `path_buf`
|
||||
// and only read that many bytes from it.
|
||||
let len = unsafe {
|
||||
GetFinalPathNameByHandleW(
|
||||
handle,
|
||||
path_buf.as_mut_ptr(),
|
||||
path_buf.len() as u32,
|
||||
FILE_NAME_NORMALIZED | VOLUME_NAME_NT,
|
||||
)
|
||||
};
|
||||
if len == 0 {
|
||||
return None;
|
||||
}
|
||||
let path = OsString::from_wide(&path_buf[..len as usize]);
|
||||
Some(PathBuf::from(path))
|
||||
}
|
||||
|
||||
/// Compare two file handles if they correspond to the same file
|
||||
fn is_same_file_by_path<A: AsHandleRef, B: AsHandleRef>(a: &A, b: &B) -> bool {
|
||||
match (get_file_path_from_handle(a), get_file_path_from_handle(b)) {
|
||||
(Some(path1), Some(path2)) => path1 == path2,
|
||||
_ => false,
|
||||
}
|
||||
}
|
|
@ -9,6 +9,7 @@ use rlimit::Resource;
|
|||
#[cfg(unix)]
|
||||
use std::fs::File;
|
||||
use std::fs::OpenOptions;
|
||||
use std::fs::read_to_string;
|
||||
use std::process::Stdio;
|
||||
use uutests::at_and_ucmd;
|
||||
use uutests::new_ucmd;
|
||||
|
@ -637,6 +638,57 @@ fn test_write_to_self() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Test derived from the following GNU test in `tests/cat/cat-self.sh`:
|
||||
///
|
||||
/// `cat fxy2 fy 1<>fxy2`
|
||||
// TODO: make this work on windows
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_successful_write_to_read_write_self() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
at.write("fy", "y");
|
||||
at.write("fxy2", "x");
|
||||
|
||||
// Open `rw_file` as both stdin and stdout (read/write)
|
||||
let fxy2_file_path = at.plus("fxy2");
|
||||
let fxy2_file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.open(&fxy2_file_path)
|
||||
.unwrap();
|
||||
ucmd.args(&["fxy2", "fy"]).set_stdout(fxy2_file).succeeds();
|
||||
|
||||
// The contents of `fxy2` and `fy` files should be merged
|
||||
let fxy2_contents = read_to_string(fxy2_file_path).unwrap();
|
||||
assert_eq!(fxy2_contents, "xy");
|
||||
}
|
||||
|
||||
/// Test derived from the following GNU test in `tests/cat/cat-self.sh`:
|
||||
///
|
||||
/// `cat fx fx3 1<>fx3`
|
||||
#[test]
|
||||
fn test_failed_write_to_read_write_self() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
at.write("fx", "g");
|
||||
at.write("fx3", "bold");
|
||||
|
||||
// Open `rw_file` as both stdin and stdout (read/write)
|
||||
let fx3_file_path = at.plus("fx3");
|
||||
let fx3_file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.open(&fx3_file_path)
|
||||
.unwrap();
|
||||
ucmd.args(&["fx", "fx3"])
|
||||
.set_stdout(fx3_file)
|
||||
.fails_with_code(1)
|
||||
.stderr_only("cat: fx3: input file is output file\n");
|
||||
|
||||
// The contents of `fx` should have overwritten the beginning of `fx3`
|
||||
let fx3_contents = read_to_string(fx3_file_path).unwrap();
|
||||
assert_eq!(fx3_contents, "gold");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
#[cfg(not(target_os = "openbsd"))]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue