mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
nl: preserve raw bytes in output instead of using from_utf8_lossy
This commit is contained in:
parent
c085cd1c21
commit
93c8d5439b
2 changed files with 40 additions and 29 deletions
|
|
@ -345,6 +345,13 @@ pub fn uu_app() -> Command {
|
|||
)
|
||||
}
|
||||
|
||||
/// Helper to write: prefix bytes + line bytes + newline
|
||||
fn write_line(writer: &mut impl Write, prefix: &[u8], line: &[u8]) -> std::io::Result<()> {
|
||||
writer.write_all(prefix)?;
|
||||
writer.write_all(line)?;
|
||||
writeln!(writer)
|
||||
}
|
||||
|
||||
/// `nl` implements the main functionality for an individual buffer.
|
||||
fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings) -> UResult<()> {
|
||||
let mut writer = BufWriter::new(stdout());
|
||||
|
|
@ -409,24 +416,17 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
|
|||
translate!("nl-error-line-number-overflow"),
|
||||
));
|
||||
};
|
||||
writeln!(
|
||||
writer,
|
||||
"{}{}{}",
|
||||
settings
|
||||
.number_format
|
||||
.format(line_number, settings.number_width),
|
||||
settings.number_separator.to_string_lossy(),
|
||||
String::from_utf8_lossy(&line),
|
||||
)
|
||||
.map_err_context(|| translate!("nl-error-could-not-write"))?;
|
||||
// update line number for the potential next line
|
||||
match line_number.checked_add(settings.line_increment) {
|
||||
Some(new_line_number) => stats.line_number = Some(new_line_number),
|
||||
None => stats.line_number = None, // overflow
|
||||
}
|
||||
let mut prefix = settings
|
||||
.number_format
|
||||
.format(line_number, settings.number_width)
|
||||
.into_bytes();
|
||||
prefix.extend_from_slice(settings.number_separator.as_encoded_bytes());
|
||||
write_line(&mut writer, &prefix, &line)
|
||||
.map_err_context(|| translate!("nl-error-could-not-write"))?;
|
||||
stats.line_number = line_number.checked_add(settings.line_increment);
|
||||
} else {
|
||||
let spaces = " ".repeat(settings.number_width + 1);
|
||||
writeln!(writer, "{spaces}{}", String::from_utf8_lossy(&line))
|
||||
let prefix = " ".repeat(settings.number_width + 1);
|
||||
write_line(&mut writer, prefix.as_bytes(), &line)
|
||||
.map_err_context(|| translate!("nl-error-could-not-write"))?;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
//
|
||||
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää
|
||||
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää févr
|
||||
use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name};
|
||||
|
||||
#[test]
|
||||
|
|
@ -209,23 +209,24 @@ fn test_number_separator() {
|
|||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_number_separator_non_utf8() {
|
||||
use std::{
|
||||
ffi::{OsStr, OsString},
|
||||
os::unix::ffi::{OsStrExt, OsStringExt},
|
||||
};
|
||||
use std::{ffi::OsString, os::unix::ffi::OsStringExt};
|
||||
|
||||
let separator_bytes = [0xFF, 0xFE];
|
||||
let mut v = b"--number-separator=".to_vec();
|
||||
v.extend_from_slice(&separator_bytes);
|
||||
|
||||
let arg = OsString::from_vec(v);
|
||||
let separator = OsStr::from_bytes(&separator_bytes);
|
||||
|
||||
// Raw bytes should be preserved in the separator output
|
||||
let mut expected = b" 1".to_vec();
|
||||
expected.extend_from_slice(&separator_bytes);
|
||||
expected.extend_from_slice(b"test\n");
|
||||
|
||||
new_ucmd!()
|
||||
.arg(arg)
|
||||
.pipe_in("test")
|
||||
.succeeds()
|
||||
.stdout_is(format!(" 1{}test\n", separator.to_string_lossy()));
|
||||
.stdout_is_bytes(expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -791,14 +792,24 @@ fn test_file_with_non_utf8_content() {
|
|||
|
||||
let filename = "file";
|
||||
let content: &[u8] = b"a\n\xFF\xFE\nb";
|
||||
let invalid_utf8: &[u8] = b"\xFF\xFE";
|
||||
|
||||
at.write_bytes(filename, content);
|
||||
|
||||
ucmd.arg(filename).succeeds().stdout_is(format!(
|
||||
" 1\ta\n 2\t{}\n 3\tb\n",
|
||||
String::from_utf8_lossy(invalid_utf8)
|
||||
));
|
||||
// Raw bytes should be preserved in output (not converted to UTF-8 replacement chars)
|
||||
let expected: Vec<u8> = b" 1\ta\n 2\t\xFF\xFE\n 3\tb\n".to_vec();
|
||||
ucmd.arg(filename).succeeds().stdout_is_bytes(expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stdin_non_utf8_preserved() {
|
||||
// Verify that non-UTF8 bytes are preserved in output, not converted to replacement chars
|
||||
// This is important for locale compatibility
|
||||
let input: Vec<u8> = b"f\xe9vr.\n".to_vec(); // "févr." in Latin-1
|
||||
let expected: Vec<u8> = b" 1\tf\xe9vr.\n".to_vec();
|
||||
new_ucmd!()
|
||||
.pipe_in(input)
|
||||
.succeeds()
|
||||
.stdout_is_bytes(expected);
|
||||
}
|
||||
|
||||
// Regression tests for issue #9132: repeated flags should use last value
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue