Fix paste to handle non-UTF-8 filenames

This commit is contained in:
Sylvestre Ledru 2025-08-08 15:28:11 +02:00
parent 581edf09d3
commit d45113f574
2 changed files with 33 additions and 12 deletions

View file

@ -5,9 +5,11 @@
use clap::{Arg, ArgAction, Command};
use std::cell::{OnceCell, RefCell};
use std::ffi::OsString;
use std::fs::File;
use std::io::{BufRead, BufReader, Stdin, Write, stdin, stdout};
use std::iter::Cycle;
use std::path::Path;
use std::rc::Rc;
use std::slice::Iter;
use uucore::LocalizedCommand;
@ -30,7 +32,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let serial = matches.get_flag(options::SERIAL);
let delimiters = matches.get_one::<String>(options::DELIMITER).unwrap();
let files = matches
.get_many::<String>(options::FILE)
.get_many::<OsString>(options::FILE)
.unwrap()
.cloned()
.collect();
@ -67,7 +69,8 @@ pub fn uu_app() -> Command {
.value_name("FILE")
.action(ArgAction::Append)
.default_value("-")
.value_hint(clap::ValueHint::FilePath),
.value_hint(clap::ValueHint::FilePath)
.value_parser(clap::value_parser!(OsString)),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
@ -80,7 +83,7 @@ pub fn uu_app() -> Command {
#[allow(clippy::cognitive_complexity)]
fn paste(
filenames: Vec<String>,
filenames: Vec<OsString>,
serial: bool,
delimiters: &str,
line_ending: LineEnding,
@ -92,17 +95,16 @@ fn paste(
let mut input_source_vec = Vec::with_capacity(filenames.len());
for filename in filenames {
let input_source = match filename.as_str() {
"-" => InputSource::StandardInput(
let input_source = if filename == "-" {
InputSource::StandardInput(
stdin_once_cell
.get_or_init(|| Rc::new(RefCell::new(stdin())))
.clone(),
),
st => {
let file = File::open(st)?;
InputSource::File(BufReader::new(file))
}
)
} else {
let path = Path::new(&filename);
let file = File::open(path)?;
InputSource::File(BufReader::new(file))
};
input_source_vec.push(input_source);

View file

@ -4,7 +4,8 @@
// file that was distributed with this source code.
// spell-checker:ignore bsdutils toybox
#[cfg(target_os = "linux")]
use std::os::unix::ffi::OsStringExt;
use uutests::at_and_ucmd;
use uutests::new_ucmd;
@ -252,6 +253,7 @@ FIRST!SECOND@THIRD#FOURTH!ABCDEFG
}
#[test]
#[cfg(unix)]
fn test_non_utf8_input() {
// 0xC0 is not valid UTF-8
const INPUT: &[u8] = b"Non-UTF-8 test: \xC0\x00\xC0.\n";
@ -375,3 +377,20 @@ fn test_data() {
.stdout_is(example.out);
}
}
#[test]
#[cfg(target_os = "linux")]
fn test_paste_non_utf8_paths() {
let (at, mut ucmd) = at_and_ucmd!();
let filename1 = std::ffi::OsString::from_vec(vec![0xFF, 0xFE]);
let filename2 = std::ffi::OsString::from_vec(vec![0xF0, 0x90]);
std::fs::write(at.plus(&filename1), b"line1\nline2\n").unwrap();
std::fs::write(at.plus(&filename2), b"col1\ncol2\n").unwrap();
ucmd.arg(&filename1)
.arg(&filename2)
.succeeds()
.stdout_is("line1\tcol1\nline2\tcol2\n");
}