cat: fix handling of non-UTF-8 filenames

This commit is contained in:
Sylvestre Ledru 2025-08-08 10:51:28 +02:00
parent 57fff7e032
commit a3ebaaa730
2 changed files with 33 additions and 7 deletions

View file

@ -10,6 +10,7 @@ mod platform;
use crate::platform::is_unsafe_overwrite;
use clap::{Arg, ArgAction, Command};
use memchr::memchr2;
use std::ffi::OsString;
use std::fs::{File, metadata};
use std::io::{self, BufWriter, ErrorKind, IsTerminal, Read, Write};
/// Unix domain socket support
@ -267,9 +268,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.any(|v| matches.get_flag(v));
let squeeze_blank = matches.get_flag(options::SQUEEZE_BLANK);
let files: Vec<String> = match matches.get_many::<String>(options::FILE) {
let files: Vec<OsString> = match matches.get_many::<OsString>(options::FILE) {
Some(v) => v.cloned().collect(),
None => vec!["-".to_owned()],
None => vec![OsString::from("-")],
};
let options = OutputOptions {
@ -294,6 +295,7 @@ pub fn uu_app() -> Command {
Arg::new(options::FILE)
.hide(true)
.action(ArgAction::Append)
.value_parser(clap::value_parser!(OsString))
.value_hint(clap::ValueHint::FilePath),
)
.arg(
@ -379,7 +381,7 @@ fn cat_handle<R: FdReadable>(
}
}
fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
fn cat_path(path: &OsString, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> {
match get_input_type(path)? {
InputType::StdIn => {
let stdin = io::stdin();
@ -417,7 +419,7 @@ fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> Cat
}
}
fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
fn cat_files(files: &[OsString], options: &OutputOptions) -> UResult<()> {
let mut state = OutputState {
line_number: LineNumber::new(),
at_line_start: true,
@ -452,8 +454,8 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
/// # Arguments
///
/// * `path` - Path on a file system to classify metadata
fn get_input_type(path: &str) -> CatResult<InputType> {
if path == "-" {
fn get_input_type(path: &OsString) -> CatResult<InputType> {
if path.to_str() == Some("-") {
return Ok(InputType::StdIn);
}

View file

@ -14,9 +14,9 @@ use std::process::Stdio;
use uutests::at_and_ucmd;
use uutests::new_ucmd;
use uutests::util::TestScenario;
use uutests::util_name;
#[cfg(not(windows))]
use uutests::util::vec_of_size;
use uutests::util_name;
#[test]
fn test_output_simple() {
@ -747,6 +747,30 @@ fn test_write_fast_read_error() {
ucmd.arg("foo").fails().stderr_contains("Permission denied");
}
#[test]
#[cfg(target_os = "linux")]
fn test_cat_non_utf8_paths() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
// Create a test file with non-UTF-8 bytes in the name
let non_utf8_bytes = b"test_\xFF\xFE.txt";
let non_utf8_name = OsStr::from_bytes(non_utf8_bytes);
// Create the actual file with some content
std::fs::write(at.plus(non_utf8_name), "Hello, non-UTF-8 world!\n").unwrap();
// Test that cat handles non-UTF-8 file names without crashing
let result = scene.ucmd().arg(non_utf8_name).succeeds();
// The result should contain the file content
let output = result.stdout_str_lossy();
assert_eq!(output, "Hello, non-UTF-8 world!\n");
}
#[test]
#[cfg(target_os = "linux")]
fn test_appending_same_input_output() {