Fix cut to handle non-UTF-8 filenames

This commit is contained in:
Sylvestre Ledru 2025-08-08 15:06:54 +02:00
parent 693bdd7748
commit 6e55a2a3bb
2 changed files with 39 additions and 8 deletions

View file

@ -343,11 +343,11 @@ fn cut_fields<R: Read, W: Write>(
}
}
fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
fn cut_files(mut filenames: Vec<OsString>, mode: &Mode) {
let mut stdin_read = false;
if filenames.is_empty() {
filenames.push("-".to_owned());
filenames.push(OsString::from("-"));
}
let mut out: Box<dyn Write> = if stdout().is_terminal() {
@ -370,12 +370,12 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
stdin_read = true;
} else {
let path = Path::new(&filename[..]);
let path = Path::new(filename);
if path.is_dir() {
show_error!(
"{}: {}",
filename.maybe_quote(),
filename.to_string_lossy().maybe_quote(),
translate!("cut-error-is-directory")
);
set_exit_code(1);
@ -384,7 +384,7 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
show_if_err!(
File::open(path)
.map_err_context(|| filename.maybe_quote().to_string())
.map_err_context(|| filename.to_string_lossy().to_string())
.and_then(|file| {
match &mode {
Mode::Bytes(ranges, opts) | Mode::Characters(ranges, opts) => {
@ -577,8 +577,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
},
};
let files: Vec<String> = matches
.get_many::<String>(options::FILE)
let files: Vec<OsString> = matches
.get_many::<OsString>(options::FILE)
.unwrap_or_default()
.cloned()
.collect();
@ -681,6 +681,7 @@ pub fn uu_app() -> Command {
Arg::new(options::FILE)
.hide(true)
.action(ArgAction::Append)
.value_hint(clap::ValueHint::FilePath),
.value_hint(clap::ValueHint::FilePath)
.value_parser(clap::value_parser!(OsString)),
)
}

View file

@ -385,3 +385,33 @@ fn test_failed_write_is_reported() {
.fails()
.stderr_is("cut: write error: No space left on device\n");
}
#[test]
#[cfg(target_os = "linux")]
fn test_cut_non_utf8_paths() {
use std::fs;
use uutests::util::TestScenario;
use uutests::util_name;
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
// Create test file with normal name first
at.write("temp.txt", "a\tb\tc\n1\t2\t3\n");
// Rename to non-UTF-8 name
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
let file_name = std::ffi::OsStr::from_bytes(b"test_\xFF\xFE.txt");
fs::rename(at.subdir.join("temp.txt"), at.subdir.join(file_name)).unwrap();
// Test that cut can handle non-UTF-8 filenames
ts.ucmd()
.arg("-f1,3")
.arg(file_name)
.succeeds()
.stdout_only("a\tc\n1\t3\n");
}
}