diff --git a/src/uu/join/src/join.rs b/src/uu/join/src/join.rs index dd1323506..4dc1dd146 100644 --- a/src/uu/join/src/join.rs +++ b/src/uu/join/src/join.rs @@ -413,7 +413,7 @@ impl Line { struct State<'a> { key: usize, - file_name: &'a str, + file_name: &'a OsString, file_num: FileNum, print_unpaired: bool, lines: Split>, @@ -427,7 +427,7 @@ struct State<'a> { impl<'a> State<'a> { fn new( file_num: FileNum, - name: &'a str, + name: &'a OsString, stdin: &'a Stdin, key: usize, line_ending: LineEnding, @@ -436,7 +436,8 @@ impl<'a> State<'a> { let file_buf = if name == "-" { Box::new(stdin.lock()) as Box } else { - let file = File::open(name).map_err_context(|| format!("{}", name.maybe_quote()))?; + let file = File::open(name) + .map_err_context(|| format!("{}", name.to_string_lossy().maybe_quote()))?; Box::new(BufReader::new(file)) as Box }; @@ -639,7 +640,7 @@ impl<'a> State<'a> { && (input.check_order == CheckOrder::Enabled || (self.has_unpaired && !self.has_failed)) { - let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string)); + let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.to_string_lossy().maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string)); // This is fatal if the check is enabled. if input.check_order == CheckOrder::Enabled { return Err(JoinError::UnorderedInput(err_msg)); @@ -826,8 +827,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let settings = parse_settings(&matches)?; - let file1 = matches.get_one::("file1").unwrap(); - let file2 = matches.get_one::("file2").unwrap(); + let file1 = matches.get_one::("file1").unwrap(); + let file2 = matches.get_one::("file2").unwrap(); if file1 == "-" && file2 == "-" { return Err(USimpleError::new( @@ -951,6 +952,7 @@ pub fn uu_app() -> Command { .required(true) .value_name("FILE1") .value_hint(clap::ValueHint::FilePath) + .value_parser(clap::value_parser!(OsString)) .hide(true), ) .arg( @@ -958,11 +960,17 @@ pub fn uu_app() -> Command { .required(true) .value_name("FILE2") .value_hint(clap::ValueHint::FilePath) + .value_parser(clap::value_parser!(OsString)) .hide(true), ) } -fn exec(file1: &str, file2: &str, settings: Settings, sep: Sep) -> UResult<()> { +fn exec( + file1: &OsString, + file2: &OsString, + settings: Settings, + sep: Sep, +) -> UResult<()> { let stdin = stdin(); let mut state1 = State::new( diff --git a/tests/by-util/test_join.rs b/tests/by-util/test_join.rs index e9924eea9..65b927717 100644 --- a/tests/by-util/test_join.rs +++ b/tests/by-util/test_join.rs @@ -533,3 +533,41 @@ fn test_full() { .fails() .stderr_contains("No space left on device"); } + +#[test] +#[cfg(target_os = "linux")] +fn test_join_non_utf8_paths() { + use std::fs; + + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + // Create files with non-UTF-8 names using shell commands + // since the test framework doesn't support OsStr for file names + let test_dir = at.subdir.as_path(); + + // Create temporary files with valid names first + at.write("temp1.txt", "a 1\n"); + at.write("temp2.txt", "a 2\n"); + + // Rename them to non-UTF-8 names using std::fs + let file1_bytes = b"test_\xFF\xFE_1.txt"; + let file2_bytes = b"test_\xFF\xFE_2.txt"; + + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + let file1_name = std::ffi::OsStr::from_bytes(file1_bytes); + let file2_name = std::ffi::OsStr::from_bytes(file2_bytes); + + fs::rename(test_dir.join("temp1.txt"), test_dir.join(file1_name)).unwrap(); + fs::rename(test_dir.join("temp2.txt"), test_dir.join(file2_name)).unwrap(); + + // Test that join can handle non-UTF-8 filenames + ts.ucmd() + .arg(file1_name) + .arg(file2_name) + .succeeds() + .stdout_only("a 1 2\n"); + } +}