cat: Fix reporting "input file is output file" error when outputting to an input file (#8025)

* cat: Check if a file can be overwritten safely in Unix

* cat: Check if a file can be overwritten safely in Windows

* cat: Test writing read-write file that is input and output

* cat: Unit test `is_appending` function

* cat: Unit test `is_unsafe_overwrite` function

* cat: Comment why a few function calls could return Err

* cat: Remove obvious comments from test
This commit is contained in:
Teemu Pätsi 2025-06-06 13:01:31 +03:00 committed by GitHub
parent 9e21259e2d
commit 4d40671d79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 253 additions and 43 deletions

3
Cargo.lock generated
View file

@ -2613,8 +2613,11 @@ dependencies = [
"clap",
"memchr",
"nix",
"tempfile",
"thiserror 2.0.12",
"uucore",
"winapi-util",
"windows-sys 0.59.0",
]
[[package]]

View file

@ -26,6 +26,13 @@ uucore = { workspace = true, features = ["fast-inc", "fs", "pipes"] }
[target.'cfg(unix)'.dependencies]
nix = { workspace = true }
[target.'cfg(windows)'.dependencies]
winapi-util = { workspace = true }
windows-sys = { workspace = true, features = ["Win32_Storage_FileSystem"] }
[dev-dependencies]
tempfile = { workspace = true }
[[bin]]
name = "cat"
path = "src/main.rs"

View file

@ -4,6 +4,10 @@
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) nonprint nonblank nonprinting ELOOP
mod platform;
use crate::platform::is_unsafe_overwrite;
use std::fs::{File, metadata};
use std::io::{self, BufWriter, IsTerminal, Read, Write};
/// Unix domain socket support
@ -18,12 +22,9 @@ use std::os::unix::net::UnixStream;
use clap::{Arg, ArgAction, Command};
use memchr::memchr2;
#[cfg(unix)]
use nix::fcntl::{FcntlArg, fcntl};
use thiserror::Error;
use uucore::display::Quotable;
use uucore::error::UResult;
use uucore::fs::FileInformation;
use uucore::locale::get_message;
use uucore::{fast_inc::fast_inc_one, format_usage};
@ -366,42 +367,17 @@ fn cat_handle<R: FdReadable>(
}
}
/// Whether this process is appending to stdout.
#[cfg(unix)]
fn is_appending() -> bool {
let stdout = io::stdout();
let Ok(flags) = fcntl(stdout.as_fd(), FcntlArg::F_GETFL) else {
return false;
};
// TODO Replace `1 << 10` with `nix::fcntl::Oflag::O_APPEND`.
let o_append = 1 << 10;
(flags & o_append) > 0
}
#[cfg(not(unix))]
fn is_appending() -> bool {
false
}
fn cat_path(
path: &str,
options: &OutputOptions,
state: &mut OutputState,
out_info: Option<&FileInformation>,
) -> CatResult<()> {
fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
match get_input_type(path)? {
InputType::StdIn => {
let stdin = io::stdin();
let in_info = FileInformation::from_file(&stdin)?;
if is_unsafe_overwrite(&stdin, &io::stdout()) {
return Err(CatError::OutputIsInput);
}
let mut handle = InputHandle {
reader: stdin,
is_interactive: io::stdin().is_terminal(),
};
if let Some(out_info) = out_info {
if in_info == *out_info && is_appending() {
return Err(CatError::OutputIsInput);
}
}
cat_handle(&mut handle, options, state)
}
InputType::Directory => Err(CatError::IsDirectory),
@ -417,15 +393,9 @@ fn cat_path(
}
_ => {
let file = File::open(path)?;
if let Some(out_info) = out_info {
if out_info.file_size() != 0
&& FileInformation::from_file(&file).ok().as_ref() == Some(out_info)
{
return Err(CatError::OutputIsInput);
}
if is_unsafe_overwrite(&file, &io::stdout()) {
return Err(CatError::OutputIsInput);
}
let mut handle = InputHandle {
reader: file,
is_interactive: false,
@ -436,8 +406,6 @@ fn cat_path(
}
fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
let out_info = FileInformation::from_file(&io::stdout()).ok();
let mut state = OutputState {
line_number: LineNumber::new(),
at_line_start: true,
@ -447,7 +415,7 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
let mut error_messages: Vec<String> = Vec::new();
for path in files {
if let Err(err) = cat_path(path, options, &mut state, out_info.as_ref()) {
if let Err(err) = cat_path(path, options, &mut state) {
error_messages.push(format!("{}: {err}", path.maybe_quote()));
}
}

View file

@ -0,0 +1,16 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
#[cfg(unix)]
pub use self::unix::is_unsafe_overwrite;
#[cfg(windows)]
pub use self::windows::is_unsafe_overwrite;
#[cfg(unix)]
mod unix;
#[cfg(windows)]
mod windows;

View file

@ -0,0 +1,108 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore lseek seekable
use nix::fcntl::{FcntlArg, OFlag, fcntl};
use nix::unistd::{Whence, lseek};
use std::os::fd::AsFd;
use uucore::fs::FileInformation;
/// An unsafe overwrite occurs when the same nonempty file is used as both stdin and stdout,
/// and the file offset of stdin is positioned earlier than that of stdout.
/// In this scenario, bytes read from stdin are written to a later part of the file
/// via stdout, which can then be read again by stdin and written again by stdout,
/// causing an infinite loop and potential file corruption.
pub fn is_unsafe_overwrite<I: AsFd, O: AsFd>(input: &I, output: &O) -> bool {
// `FileInformation::from_file` returns an error if the file descriptor is closed, invalid,
// or refers to a non-regular file (e.g., socket, pipe, or special device).
let Ok(input_info) = FileInformation::from_file(input) else {
return false;
};
let Ok(output_info) = FileInformation::from_file(output) else {
return false;
};
if input_info != output_info || output_info.file_size() == 0 {
return false;
}
if is_appending(output) {
return true;
}
// `lseek` returns an error if the file descriptor is closed or it refers to
// a non-seekable resource (e.g., pipe, socket, or some devices).
let Ok(input_pos) = lseek(input.as_fd(), 0, Whence::SeekCur) else {
return false;
};
let Ok(output_pos) = lseek(output.as_fd(), 0, Whence::SeekCur) else {
return false;
};
input_pos < output_pos
}
/// Whether the file is opened with the `O_APPEND` flag
fn is_appending<F: AsFd>(file: &F) -> bool {
let flags_raw = fcntl(file.as_fd(), FcntlArg::F_GETFL).unwrap_or_default();
let flags = OFlag::from_bits_truncate(flags_raw);
flags.contains(OFlag::O_APPEND)
}
#[cfg(test)]
mod tests {
use crate::platform::unix::{is_appending, is_unsafe_overwrite};
use std::fs::OpenOptions;
use std::io::{Seek, SeekFrom, Write};
use tempfile::NamedTempFile;
#[test]
fn test_is_appending() {
let temp_file = NamedTempFile::new().unwrap();
assert!(!is_appending(&temp_file));
let read_file = OpenOptions::new().read(true).open(&temp_file).unwrap();
assert!(!is_appending(&read_file));
let write_file = OpenOptions::new().write(true).open(&temp_file).unwrap();
assert!(!is_appending(&write_file));
let append_file = OpenOptions::new().append(true).open(&temp_file).unwrap();
assert!(is_appending(&append_file));
}
#[test]
fn test_is_unsafe_overwrite() {
// Create two temp files one of which is empty
let empty = NamedTempFile::new().unwrap();
let mut nonempty = NamedTempFile::new().unwrap();
nonempty.write_all(b"anything").unwrap();
nonempty.seek(SeekFrom::Start(0)).unwrap();
// Using a different file as input and output does not result in an overwrite
assert!(!is_unsafe_overwrite(&empty, &nonempty));
// Overwriting an empty file is always safe
assert!(!is_unsafe_overwrite(&empty, &empty));
// Overwriting a nonempty file with itself is safe
assert!(!is_unsafe_overwrite(&nonempty, &nonempty));
// Overwriting an empty file opened in append mode is safe
let empty_append = OpenOptions::new().append(true).open(&empty).unwrap();
assert!(!is_unsafe_overwrite(&empty, &empty_append));
// Overwriting a nonempty file opened in append mode is unsafe
let nonempty_append = OpenOptions::new().append(true).open(&nonempty).unwrap();
assert!(is_unsafe_overwrite(&nonempty, &nonempty_append));
// Overwriting a file opened in write mode is safe
let mut nonempty_write = OpenOptions::new().write(true).open(&nonempty).unwrap();
assert!(!is_unsafe_overwrite(&nonempty, &nonempty_write));
// Overwriting a file when the input and output file descriptors are pointing to
// different offsets is safe if the input offset is further than the output offset
nonempty_write.seek(SeekFrom::Start(1)).unwrap();
assert!(!is_unsafe_overwrite(&nonempty_write, &nonempty));
assert!(is_unsafe_overwrite(&nonempty, &nonempty_write));
}
}

View file

@ -0,0 +1,56 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
use std::path::PathBuf;
use uucore::fs::FileInformation;
use winapi_util::AsHandleRef;
use windows_sys::Win32::Storage::FileSystem::{
FILE_NAME_NORMALIZED, GetFinalPathNameByHandleW, VOLUME_NAME_NT,
};
/// An unsafe overwrite occurs when the same file is used as both stdin and stdout
/// and the stdout file is not empty.
pub fn is_unsafe_overwrite<I: AsHandleRef, O: AsHandleRef>(input: &I, output: &O) -> bool {
if !is_same_file_by_path(input, output) {
return false;
}
// Check if the output file is empty
FileInformation::from_file(output)
.map(|info| info.file_size() > 0)
.unwrap_or(false)
}
/// Get the file path for a file handle
fn get_file_path_from_handle<F: AsHandleRef>(file: &F) -> Option<PathBuf> {
let handle = file.as_raw();
let mut path_buf = vec![0u16; 4096];
// SAFETY: We should check how many bytes was written to `path_buf`
// and only read that many bytes from it.
let len = unsafe {
GetFinalPathNameByHandleW(
handle,
path_buf.as_mut_ptr(),
path_buf.len() as u32,
FILE_NAME_NORMALIZED | VOLUME_NAME_NT,
)
};
if len == 0 {
return None;
}
let path = OsString::from_wide(&path_buf[..len as usize]);
Some(PathBuf::from(path))
}
/// Compare two file handles if they correspond to the same file
fn is_same_file_by_path<A: AsHandleRef, B: AsHandleRef>(a: &A, b: &B) -> bool {
match (get_file_path_from_handle(a), get_file_path_from_handle(b)) {
(Some(path1), Some(path2)) => path1 == path2,
_ => false,
}
}

View file

@ -9,6 +9,7 @@ use rlimit::Resource;
#[cfg(unix)]
use std::fs::File;
use std::fs::OpenOptions;
use std::fs::read_to_string;
use std::process::Stdio;
use uutests::at_and_ucmd;
use uutests::new_ucmd;
@ -637,6 +638,57 @@ fn test_write_to_self() {
);
}
/// Test derived from the following GNU test in `tests/cat/cat-self.sh`:
///
/// `cat fxy2 fy 1<>fxy2`
// TODO: make this work on windows
#[test]
#[cfg(unix)]
fn test_successful_write_to_read_write_self() {
let (at, mut ucmd) = at_and_ucmd!();
at.write("fy", "y");
at.write("fxy2", "x");
// Open `rw_file` as both stdin and stdout (read/write)
let fxy2_file_path = at.plus("fxy2");
let fxy2_file = OpenOptions::new()
.read(true)
.write(true)
.open(&fxy2_file_path)
.unwrap();
ucmd.args(&["fxy2", "fy"]).set_stdout(fxy2_file).succeeds();
// The contents of `fxy2` and `fy` files should be merged
let fxy2_contents = read_to_string(fxy2_file_path).unwrap();
assert_eq!(fxy2_contents, "xy");
}
/// Test derived from the following GNU test in `tests/cat/cat-self.sh`:
///
/// `cat fx fx3 1<>fx3`
#[test]
fn test_failed_write_to_read_write_self() {
let (at, mut ucmd) = at_and_ucmd!();
at.write("fx", "g");
at.write("fx3", "bold");
// Open `rw_file` as both stdin and stdout (read/write)
let fx3_file_path = at.plus("fx3");
let fx3_file = OpenOptions::new()
.read(true)
.write(true)
.open(&fx3_file_path)
.unwrap();
ucmd.args(&["fx", "fx3"])
.set_stdout(fx3_file)
.fails_with_code(1)
.stderr_only("cat: fx3: input file is output file\n");
// The contents of `fx` should have overwritten the beginning of `fx3`
let fx3_contents = read_to_string(fx3_file_path).unwrap();
assert_eq!(fx3_contents, "gold");
}
#[test]
#[cfg(unix)]
#[cfg(not(target_os = "openbsd"))]