From a3ebaaa730b5d6a8845533e01cdda58ef2081190 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 8 Aug 2025 10:51:28 +0200 Subject: [PATCH] cat: fix handling of non-UTF-8 filenames --- src/uu/cat/src/cat.rs | 14 ++++++++------ tests/by-util/test_cat.rs | 26 +++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index f03c7c3b9..d19c39f56 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -10,6 +10,7 @@ mod platform; use crate::platform::is_unsafe_overwrite; use clap::{Arg, ArgAction, Command}; use memchr::memchr2; +use std::ffi::OsString; use std::fs::{File, metadata}; use std::io::{self, BufWriter, ErrorKind, IsTerminal, Read, Write}; /// Unix domain socket support @@ -267,9 +268,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .any(|v| matches.get_flag(v)); let squeeze_blank = matches.get_flag(options::SQUEEZE_BLANK); - let files: Vec = match matches.get_many::(options::FILE) { + let files: Vec = match matches.get_many::(options::FILE) { Some(v) => v.cloned().collect(), - None => vec!["-".to_owned()], + None => vec![OsString::from("-")], }; let options = OutputOptions { @@ -294,6 +295,7 @@ pub fn uu_app() -> Command { Arg::new(options::FILE) .hide(true) .action(ArgAction::Append) + .value_parser(clap::value_parser!(OsString)) .value_hint(clap::ValueHint::FilePath), ) .arg( @@ -379,7 +381,7 @@ fn cat_handle( } } -fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { +fn cat_path(path: &OsString, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { match get_input_type(path)? { InputType::StdIn => { let stdin = io::stdin(); @@ -417,7 +419,7 @@ fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> Cat } } -fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> { +fn cat_files(files: &[OsString], options: &OutputOptions) -> UResult<()> { let mut state = OutputState { line_number: LineNumber::new(), at_line_start: true, @@ -452,8 +454,8 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> { /// # Arguments /// /// * `path` - Path on a file system to classify metadata -fn get_input_type(path: &str) -> CatResult { - if path == "-" { +fn get_input_type(path: &OsString) -> CatResult { + if path.to_str() == Some("-") { return Ok(InputType::StdIn); } diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index c3e25c6d0..647775a3a 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -14,9 +14,9 @@ use std::process::Stdio; use uutests::at_and_ucmd; use uutests::new_ucmd; use uutests::util::TestScenario; +use uutests::util_name; #[cfg(not(windows))] use uutests::util::vec_of_size; -use uutests::util_name; #[test] fn test_output_simple() { @@ -747,6 +747,30 @@ fn test_write_fast_read_error() { ucmd.arg("foo").fails().stderr_contains("Permission denied"); } +#[test] +#[cfg(target_os = "linux")] +fn test_cat_non_utf8_paths() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Create a test file with non-UTF-8 bytes in the name + let non_utf8_bytes = b"test_\xFF\xFE.txt"; + let non_utf8_name = OsStr::from_bytes(non_utf8_bytes); + + // Create the actual file with some content + std::fs::write(at.plus(non_utf8_name), "Hello, non-UTF-8 world!\n").unwrap(); + + // Test that cat handles non-UTF-8 file names without crashing + let result = scene.ucmd().arg(non_utf8_name).succeeds(); + + // The result should contain the file content + let output = result.stdout_str_lossy(); + assert_eq!(output, "Hello, non-UTF-8 world!\n"); +} + #[test] #[cfg(target_os = "linux")] fn test_appending_same_input_output() {