Fix join to handle non-UTF-8 filenames

This commit is contained in:
Sylvestre Ledru 2025-08-08 14:06:04 +02:00
parent 6c996865c9
commit 1056ebe0d5
2 changed files with 53 additions and 7 deletions

View file

@ -413,7 +413,7 @@ impl Line {
struct State<'a> {
key: usize,
file_name: &'a str,
file_name: &'a OsString,
file_num: FileNum,
print_unpaired: bool,
lines: Split<Box<dyn BufRead + 'a>>,
@ -427,7 +427,7 @@ struct State<'a> {
impl<'a> State<'a> {
fn new(
file_num: FileNum,
name: &'a str,
name: &'a OsString,
stdin: &'a Stdin,
key: usize,
line_ending: LineEnding,
@ -436,7 +436,8 @@ impl<'a> State<'a> {
let file_buf = if name == "-" {
Box::new(stdin.lock()) as Box<dyn BufRead>
} else {
let file = File::open(name).map_err_context(|| format!("{}", name.maybe_quote()))?;
let file = File::open(name)
.map_err_context(|| format!("{}", name.to_string_lossy().maybe_quote()))?;
Box::new(BufReader::new(file)) as Box<dyn BufRead>
};
@ -639,7 +640,7 @@ impl<'a> State<'a> {
&& (input.check_order == CheckOrder::Enabled
|| (self.has_unpaired && !self.has_failed))
{
let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string));
let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.to_string_lossy().maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string));
// This is fatal if the check is enabled.
if input.check_order == CheckOrder::Enabled {
return Err(JoinError::UnorderedInput(err_msg));
@ -826,8 +827,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let settings = parse_settings(&matches)?;
let file1 = matches.get_one::<String>("file1").unwrap();
let file2 = matches.get_one::<String>("file2").unwrap();
let file1 = matches.get_one::<OsString>("file1").unwrap();
let file2 = matches.get_one::<OsString>("file2").unwrap();
if file1 == "-" && file2 == "-" {
return Err(USimpleError::new(
@ -951,6 +952,7 @@ pub fn uu_app() -> Command {
.required(true)
.value_name("FILE1")
.value_hint(clap::ValueHint::FilePath)
.value_parser(clap::value_parser!(OsString))
.hide(true),
)
.arg(
@ -958,11 +960,17 @@ pub fn uu_app() -> Command {
.required(true)
.value_name("FILE2")
.value_hint(clap::ValueHint::FilePath)
.value_parser(clap::value_parser!(OsString))
.hide(true),
)
}
fn exec<Sep: Separator>(file1: &str, file2: &str, settings: Settings, sep: Sep) -> UResult<()> {
fn exec<Sep: Separator>(
file1: &OsString,
file2: &OsString,
settings: Settings,
sep: Sep,
) -> UResult<()> {
let stdin = stdin();
let mut state1 = State::new(

View file

@ -533,3 +533,41 @@ fn test_full() {
.fails()
.stderr_contains("No space left on device");
}
#[test]
#[cfg(target_os = "linux")]
fn test_join_non_utf8_paths() {
use std::fs;
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
// Create files with non-UTF-8 names using shell commands
// since the test framework doesn't support OsStr for file names
let test_dir = at.subdir.as_path();
// Create temporary files with valid names first
at.write("temp1.txt", "a 1\n");
at.write("temp2.txt", "a 2\n");
// Rename them to non-UTF-8 names using std::fs
let file1_bytes = b"test_\xFF\xFE_1.txt";
let file2_bytes = b"test_\xFF\xFE_2.txt";
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
let file1_name = std::ffi::OsStr::from_bytes(file1_bytes);
let file2_name = std::ffi::OsStr::from_bytes(file2_bytes);
fs::rename(test_dir.join("temp1.txt"), test_dir.join(file1_name)).unwrap();
fs::rename(test_dir.join("temp2.txt"), test_dir.join(file2_name)).unwrap();
// Test that join can handle non-UTF-8 filenames
ts.ucmd()
.arg(file1_name)
.arg(file2_name)
.succeeds()
.stdout_only("a 1 2\n");
}
}