head: fix handling of non-UTF-8 filenames

This commit is contained in:
Sylvestre Ledru 2025-08-08 10:32:32 +02:00
parent b301131a67
commit becbc0e19d
2 changed files with 71 additions and 13 deletions

View file

@ -49,6 +49,7 @@ enum HeadError {
ParseError(String),
#[error("{}", translate!("head-error-bad-encoding"))]
#[allow(dead_code)]
BadEncoding,
#[error("{}", translate!("head-error-num-too-large"))]
@ -129,6 +130,7 @@ pub fn uu_app() -> Command {
.arg(
Arg::new(options::FILES_NAME)
.action(ArgAction::Append)
.value_parser(clap::value_parser!(OsString))
.value_hint(clap::ValueHint::FilePath),
)
}
@ -186,7 +188,9 @@ fn arg_iterate<'a>(
None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
}
} else {
Err(HeadError::BadEncoding)
// The second argument contains non-UTF-8 sequences, so it can't be an obsolete option
// like "-5". Treat it as a regular file argument.
Ok(Box::new(vec![first, second].into_iter().chain(args)))
}
} else {
Ok(Box::new(vec![first].into_iter()))
@ -200,7 +204,7 @@ struct HeadOptions {
pub line_ending: LineEnding,
pub presume_input_pipe: bool,
pub mode: Mode,
pub files: Vec<String>,
pub files: Vec<OsString>,
}
impl HeadOptions {
@ -215,9 +219,9 @@ impl HeadOptions {
options.mode = Mode::from(matches)?;
options.files = match matches.get_many::<String>(options::FILES_NAME) {
options.files = match matches.get_many::<OsString>(options::FILES_NAME) {
Some(v) => v.cloned().collect(),
None => vec!["-".to_owned()],
None => vec![OsString::from("-")],
};
Ok(options)
@ -463,8 +467,8 @@ fn head_file(input: &mut File, options: &HeadOptions) -> io::Result<u64> {
fn uu_head(options: &HeadOptions) -> UResult<()> {
let mut first = true;
for file in &options.files {
let res = match file.as_str() {
"-" => {
let res = match file.to_str() {
Some("-") => {
if (options.files.len() > 1 && !options.quiet) || options.verbose {
if !first {
println!();
@ -508,12 +512,12 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
Ok(())
}
name => {
let mut file = match File::open(name) {
Some(name) => {
let mut file_handle = match File::open(file) {
Ok(f) => f,
Err(err) => {
show!(err.map_err_context(
|| translate!("head-error-cannot-open", "name" => name.quote())
|| translate!("head-error-cannot-open", "name" => file.to_string_lossy().quote())
));
continue;
}
@ -524,15 +528,35 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
}
println!("==> {name} <==");
}
head_file(&mut file, options)?;
head_file(&mut file_handle, options)?;
Ok(())
}
None => {
// Handle files with non-UTF-8 names
let mut file_handle = match File::open(file) {
Ok(f) => f,
Err(err) => {
show!(err.map_err_context(
|| translate!("head-error-cannot-open", "name" => file.to_string_lossy().quote())
));
continue;
}
};
if (options.files.len() > 1 && !options.quiet) || options.verbose {
if !first {
println!();
}
println!("==> {} <==", file.to_string_lossy());
}
head_file(&mut file_handle, options)?;
Ok(())
}
};
if let Err(e) = res {
let name = if file.as_str() == "-" {
"standard input"
let name = if file == "-" {
"standard input".to_string()
} else {
file
file.to_string_lossy().into_owned()
};
return Err(HeadError::Io {
name: name.to_string(),

View file

@ -858,3 +858,37 @@ fn test_write_to_dev_full() {
}
}
}
#[test]
#[cfg(target_os = "linux")]
fn test_head_non_utf8_paths() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
// Create a test file with non-UTF-8 bytes in the name
let non_utf8_bytes = b"test_\xFF\xFE.txt";
let non_utf8_name = OsStr::from_bytes(non_utf8_bytes);
// Create the actual file with some content
std::fs::write(at.plus(non_utf8_name), "line1\nline2\nline3\n").unwrap();
// Test that head handles non-UTF-8 file names without crashing
let result = scene.ucmd().arg(non_utf8_name).succeeds();
// The result should contain the file content
let output = result.stdout_str_lossy();
assert!(output.contains("line1"));
assert!(output.contains("line2"));
assert!(output.contains("line3"));
// Test with line count argument
scene.ucmd()
.args(&["-n", "2"])
.arg(non_utf8_name)
.succeeds()
.stdout_contains("line1")
.stdout_contains("line2");
}