From becbc0e19d14a4ed8274ee37a6cdb6dad3bdb829 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 8 Aug 2025 10:32:32 +0200 Subject: [PATCH] head: fix handling of non-UTF-8 filenames --- src/uu/head/src/head.rs | 50 ++++++++++++++++++++++++++++---------- tests/by-util/test_head.rs | 34 ++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 818062ba9..7c543b157 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -49,6 +49,7 @@ enum HeadError { ParseError(String), #[error("{}", translate!("head-error-bad-encoding"))] + #[allow(dead_code)] BadEncoding, #[error("{}", translate!("head-error-num-too-large"))] @@ -129,6 +130,7 @@ pub fn uu_app() -> Command { .arg( Arg::new(options::FILES_NAME) .action(ArgAction::Append) + .value_parser(clap::value_parser!(OsString)) .value_hint(clap::ValueHint::FilePath), ) } @@ -186,7 +188,9 @@ fn arg_iterate<'a>( None => Ok(Box::new(vec![first, second].into_iter().chain(args))), } } else { - Err(HeadError::BadEncoding) + // The second argument contains non-UTF-8 sequences, so it can't be an obsolete option + // like "-5". Treat it as a regular file argument. + Ok(Box::new(vec![first, second].into_iter().chain(args))) } } else { Ok(Box::new(vec![first].into_iter())) @@ -200,7 +204,7 @@ struct HeadOptions { pub line_ending: LineEnding, pub presume_input_pipe: bool, pub mode: Mode, - pub files: Vec, + pub files: Vec, } impl HeadOptions { @@ -215,9 +219,9 @@ impl HeadOptions { options.mode = Mode::from(matches)?; - options.files = match matches.get_many::(options::FILES_NAME) { + options.files = match matches.get_many::(options::FILES_NAME) { Some(v) => v.cloned().collect(), - None => vec!["-".to_owned()], + None => vec![OsString::from("-")], }; Ok(options) @@ -463,8 +467,8 @@ fn head_file(input: &mut File, options: &HeadOptions) -> io::Result { fn uu_head(options: &HeadOptions) -> UResult<()> { let mut first = true; for file in &options.files { - let res = match file.as_str() { - "-" => { + let res = match file.to_str() { + Some("-") => { if (options.files.len() > 1 && !options.quiet) || options.verbose { if !first { println!(); @@ -508,12 +512,12 @@ fn uu_head(options: &HeadOptions) -> UResult<()> { Ok(()) } - name => { - let mut file = match File::open(name) { + Some(name) => { + let mut file_handle = match File::open(file) { Ok(f) => f, Err(err) => { show!(err.map_err_context( - || translate!("head-error-cannot-open", "name" => name.quote()) + || translate!("head-error-cannot-open", "name" => file.to_string_lossy().quote()) )); continue; } @@ -524,15 +528,35 @@ fn uu_head(options: &HeadOptions) -> UResult<()> { } println!("==> {name} <=="); } - head_file(&mut file, options)?; + head_file(&mut file_handle, options)?; + Ok(()) + } + None => { + // Handle files with non-UTF-8 names + let mut file_handle = match File::open(file) { + Ok(f) => f, + Err(err) => { + show!(err.map_err_context( + || translate!("head-error-cannot-open", "name" => file.to_string_lossy().quote()) + )); + continue; + } + }; + if (options.files.len() > 1 && !options.quiet) || options.verbose { + if !first { + println!(); + } + println!("==> {} <==", file.to_string_lossy()); + } + head_file(&mut file_handle, options)?; Ok(()) } }; if let Err(e) = res { - let name = if file.as_str() == "-" { - "standard input" + let name = if file == "-" { + "standard input".to_string() } else { - file + file.to_string_lossy().into_owned() }; return Err(HeadError::Io { name: name.to_string(), diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 30f8378b9..2cdabdf3d 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -858,3 +858,37 @@ fn test_write_to_dev_full() { } } } + +#[test] +#[cfg(target_os = "linux")] +fn test_head_non_utf8_paths() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + // Create a test file with non-UTF-8 bytes in the name + let non_utf8_bytes = b"test_\xFF\xFE.txt"; + let non_utf8_name = OsStr::from_bytes(non_utf8_bytes); + + // Create the actual file with some content + std::fs::write(at.plus(non_utf8_name), "line1\nline2\nline3\n").unwrap(); + + // Test that head handles non-UTF-8 file names without crashing + let result = scene.ucmd().arg(non_utf8_name).succeeds(); + + // The result should contain the file content + let output = result.stdout_str_lossy(); + assert!(output.contains("line1")); + assert!(output.contains("line2")); + assert!(output.contains("line3")); + + // Test with line count argument + scene.ucmd() + .args(&["-n", "2"]) + .arg(non_utf8_name) + .succeeds() + .stdout_contains("line1") + .stdout_contains("line2"); +}