From ef40c472a38948fe1356e3077890b030f7d61877 Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Tue, 5 Aug 2025 15:47:49 +0200 Subject: [PATCH 1/2] tr: fix high memory use, possible heap exhaustion Read the input into a statically sized buffer - 8192, matching GNU - instead of reading until the end of the line, as reading until the end of the line in a file with no end of line would result in reading the entire file into memory. Confusingly, GNU tr seems to write the 8192 byte in two chunks of 1024 and 7168 byte, but I can't figure out why it would do that; I don't see any line buffering in GNU tr. Bug-Ubuntu: https://launchpad.net/bugs/2119520 --- src/uu/tr/src/operation.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index baf8f70aa..af790b931 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -672,15 +672,17 @@ where R: BufRead, W: Write, { - let mut buf = Vec::new(); + let mut buf = [0; 8192]; let mut output_buf = Vec::new(); - while let Ok(length) = input.read_until(b'\n', &mut buf) { + while let Ok(length) = input.read(&mut buf[..]) { if length == 0 { break; // EOF reached } - let filtered = buf.iter().filter_map(|&c| translator.translate(c)); + let filtered = buf[..length] + .iter() + .filter_map(|&c| translator.translate(c)); output_buf.extend(filtered); #[cfg(not(target_os = "windows"))] @@ -698,7 +700,6 @@ where } } - buf.clear(); output_buf.clear(); } From c93b9ed6beb29c97ccabbdf0646fb8515198de6d Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Tue, 5 Aug 2025 16:02:51 +0200 Subject: [PATCH 2/2] tr: use an unbuffered stdout Our stdin that we transform already is buffered (using 8192 byte buffers in the previous commit), so avoid buffering our output needlessly. This effectively changes the code to write complete lines immediately, for example, in `( echo a; sleep 1 ) | tr a b` we receive read(0, "a\n", 8192) = 2 write(1, "b\n", 2) = 2 read(0, "", 8192) = 0 instead of read(0, "a\n", 8192) = 2 read(0, "", 8192) = 0 write(1, "b\n", 2) = 2 which matches the GNU coreutils behavior. --- src/uu/tr/src/tr.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 9bf8bbbec..5e5316dbc 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -12,7 +12,7 @@ use operation::{ translate_input, }; use std::ffi::OsString; -use std::io::{BufWriter, Write, stdin, stdout}; +use std::io::{Write, stdin, stdout}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::fs::is_stdin_directory; @@ -107,7 +107,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let stdin = stdin(); let mut locked_stdin = stdin.lock(); - let mut buffered_stdout = BufWriter::new(stdout().lock()); + let mut locked_stdout = stdout().lock(); // According to the man page: translating only happens if deleting or if a second set is given let translating = !delete_flag && sets.len() > 1; @@ -131,34 +131,34 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let delete_op = DeleteOperation::new(set1); let squeeze_op = SqueezeOperation::new(set2); let op = delete_op.chain(squeeze_op); - translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; + translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } else { let op = DeleteOperation::new(set1); - translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; + translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } } else if squeeze_flag { if sets_len == 1 { let op = SqueezeOperation::new(set1); - translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; + translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } else { let translate_op = TranslateOperation::new(set1, set2.clone())?; let squeeze_op = SqueezeOperation::new(set2); let op = translate_op.chain(squeeze_op); - translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; + translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } } else { let op = TranslateOperation::new(set1, set2)?; - translate_input(&mut locked_stdin, &mut buffered_stdout, op)?; + translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } #[cfg(not(target_os = "windows"))] - buffered_stdout + locked_stdout .flush() .map_err_context(|| translate!("tr-error-write-error"))?; // SIGPIPE is not available on Windows. #[cfg(target_os = "windows")] - match buffered_stdout.flush() { + match locked_stdout.flush() { Ok(()) => {} Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => std::process::exit(13), Err(err) => return Err(err.map_err_context(|| translate!("tr-error-write-error"))),