mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
Fix join to handle non-UTF-8 filenames
This commit is contained in:
parent
6c996865c9
commit
1056ebe0d5
2 changed files with 53 additions and 7 deletions
|
|
@ -413,7 +413,7 @@ impl Line {
|
|||
|
||||
struct State<'a> {
|
||||
key: usize,
|
||||
file_name: &'a str,
|
||||
file_name: &'a OsString,
|
||||
file_num: FileNum,
|
||||
print_unpaired: bool,
|
||||
lines: Split<Box<dyn BufRead + 'a>>,
|
||||
|
|
@ -427,7 +427,7 @@ struct State<'a> {
|
|||
impl<'a> State<'a> {
|
||||
fn new(
|
||||
file_num: FileNum,
|
||||
name: &'a str,
|
||||
name: &'a OsString,
|
||||
stdin: &'a Stdin,
|
||||
key: usize,
|
||||
line_ending: LineEnding,
|
||||
|
|
@ -436,7 +436,8 @@ impl<'a> State<'a> {
|
|||
let file_buf = if name == "-" {
|
||||
Box::new(stdin.lock()) as Box<dyn BufRead>
|
||||
} else {
|
||||
let file = File::open(name).map_err_context(|| format!("{}", name.maybe_quote()))?;
|
||||
let file = File::open(name)
|
||||
.map_err_context(|| format!("{}", name.to_string_lossy().maybe_quote()))?;
|
||||
Box::new(BufReader::new(file)) as Box<dyn BufRead>
|
||||
};
|
||||
|
||||
|
|
@ -639,7 +640,7 @@ impl<'a> State<'a> {
|
|||
&& (input.check_order == CheckOrder::Enabled
|
||||
|| (self.has_unpaired && !self.has_failed))
|
||||
{
|
||||
let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string));
|
||||
let err_msg = translate!("join-error-not-sorted", "file" => self.file_name.to_string_lossy().maybe_quote(), "line_num" => self.line_num, "content" => String::from_utf8_lossy(&line.string));
|
||||
// This is fatal if the check is enabled.
|
||||
if input.check_order == CheckOrder::Enabled {
|
||||
return Err(JoinError::UnorderedInput(err_msg));
|
||||
|
|
@ -826,8 +827,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
|
||||
let settings = parse_settings(&matches)?;
|
||||
|
||||
let file1 = matches.get_one::<String>("file1").unwrap();
|
||||
let file2 = matches.get_one::<String>("file2").unwrap();
|
||||
let file1 = matches.get_one::<OsString>("file1").unwrap();
|
||||
let file2 = matches.get_one::<OsString>("file2").unwrap();
|
||||
|
||||
if file1 == "-" && file2 == "-" {
|
||||
return Err(USimpleError::new(
|
||||
|
|
@ -951,6 +952,7 @@ pub fn uu_app() -> Command {
|
|||
.required(true)
|
||||
.value_name("FILE1")
|
||||
.value_hint(clap::ValueHint::FilePath)
|
||||
.value_parser(clap::value_parser!(OsString))
|
||||
.hide(true),
|
||||
)
|
||||
.arg(
|
||||
|
|
@ -958,11 +960,17 @@ pub fn uu_app() -> Command {
|
|||
.required(true)
|
||||
.value_name("FILE2")
|
||||
.value_hint(clap::ValueHint::FilePath)
|
||||
.value_parser(clap::value_parser!(OsString))
|
||||
.hide(true),
|
||||
)
|
||||
}
|
||||
|
||||
fn exec<Sep: Separator>(file1: &str, file2: &str, settings: Settings, sep: Sep) -> UResult<()> {
|
||||
fn exec<Sep: Separator>(
|
||||
file1: &OsString,
|
||||
file2: &OsString,
|
||||
settings: Settings,
|
||||
sep: Sep,
|
||||
) -> UResult<()> {
|
||||
let stdin = stdin();
|
||||
|
||||
let mut state1 = State::new(
|
||||
|
|
|
|||
|
|
@ -533,3 +533,41 @@ fn test_full() {
|
|||
.fails()
|
||||
.stderr_contains("No space left on device");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_join_non_utf8_paths() {
|
||||
use std::fs;
|
||||
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
// Create files with non-UTF-8 names using shell commands
|
||||
// since the test framework doesn't support OsStr for file names
|
||||
let test_dir = at.subdir.as_path();
|
||||
|
||||
// Create temporary files with valid names first
|
||||
at.write("temp1.txt", "a 1\n");
|
||||
at.write("temp2.txt", "a 2\n");
|
||||
|
||||
// Rename them to non-UTF-8 names using std::fs
|
||||
let file1_bytes = b"test_\xFF\xFE_1.txt";
|
||||
let file2_bytes = b"test_\xFF\xFE_2.txt";
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
let file1_name = std::ffi::OsStr::from_bytes(file1_bytes);
|
||||
let file2_name = std::ffi::OsStr::from_bytes(file2_bytes);
|
||||
|
||||
fs::rename(test_dir.join("temp1.txt"), test_dir.join(file1_name)).unwrap();
|
||||
fs::rename(test_dir.join("temp2.txt"), test_dir.join(file2_name)).unwrap();
|
||||
|
||||
// Test that join can handle non-UTF-8 filenames
|
||||
ts.ucmd()
|
||||
.arg(file1_name)
|
||||
.arg(file2_name)
|
||||
.succeeds()
|
||||
.stdout_only("a 1 2\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue