diff --git a/src/uu/nl/src/helper.rs b/src/uu/nl/src/helper.rs index 44f5fa750..6bbec85f4 100644 --- a/src/uu/nl/src/helper.rs +++ b/src/uu/nl/src/helper.rs @@ -16,11 +16,13 @@ pub fn parse_options(settings: &mut crate::Settings, opts: &clap::ArgMatches) -> // This vector holds error messages encountered. let mut errs: Vec = vec![]; settings.renumber = opts.get_flag(options::NO_RENUMBER); - if let Some(delimiter) = opts.get_one::(options::SECTION_DELIMITER) { - // check whether the delimiter is a single ASCII char (1 byte) - // because GNU nl doesn't add a ':' to single non-ASCII chars + if let Some(delimiter) = opts.get_one::(options::SECTION_DELIMITER) { + // GNU nl determines whether a delimiter is a "single character" based on byte length, not + // character length. A "single character" implies the second character is a ':'. settings.section_delimiter = if delimiter.len() == 1 { - format!("{delimiter}:") + let mut delimiter = delimiter.clone(); + delimiter.push(":"); + delimiter } else { delimiter.clone() }; diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 56a390d9c..261d7897f 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -4,7 +4,7 @@ // file that was distributed with this source code. use clap::{Arg, ArgAction, Command}; -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::fs::File; use std::io::{BufRead, BufReader, Read, stdin}; use std::path::Path; @@ -20,7 +20,7 @@ pub struct Settings { body_numbering: NumberingStyle, footer_numbering: NumberingStyle, // The variable corresponding to -d - section_delimiter: String, + section_delimiter: OsString, // The variables corresponding to the options -v, -i, -l, -w. starting_line_number: i64, line_increment: i64, @@ -40,7 +40,7 @@ impl Default for Settings { header_numbering: NumberingStyle::None, body_numbering: NumberingStyle::NonEmpty, footer_numbering: NumberingStyle::None, - section_delimiter: String::from("\\:"), + section_delimiter: OsString::from("\\:"), starting_line_number: 1, line_increment: 1, join_blank_lines: 1, @@ -140,8 +140,8 @@ enum SectionDelimiter { impl SectionDelimiter { /// A valid section delimiter contains the pattern one to three times, /// and nothing else. - fn parse(bytes: &[u8], pattern: &str) -> Option { - let pattern = pattern.as_bytes(); + fn parse(bytes: &[u8], pattern: &OsStr) -> Option { + let pattern = pattern.as_encoded_bytes(); if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 { return None; @@ -270,6 +270,7 @@ pub fn uu_app() -> Command { .short('d') .long(options::SECTION_DELIMITER) .help(translate!("nl-help-section-delimiter")) + .value_parser(clap::value_parser!(OsString)) .value_name("CC"), ) .arg( diff --git a/tests/by-util/test_nl.rs b/tests/by-util/test_nl.rs index 03e98cbcd..953d7bcb3 100644 --- a/tests/by-util/test_nl.rs +++ b/tests/by-util/test_nl.rs @@ -627,7 +627,50 @@ fn test_section_delimiter() { } #[test] -fn test_one_char_section_delimiter_expansion() { +#[cfg(target_os = "linux")] +fn test_section_delimiter_non_utf8() { + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; + + fn create_arg(prefix: &[u8]) -> OsString { + let section_delimiter = [0xFF, 0xFE]; + let mut v = prefix.to_vec(); + v.extend_from_slice(§ion_delimiter); + OsString::from_vec(v) + } + + let short = create_arg(b"-d"); + let long = create_arg(b"--section-delimiter="); + + for arg in [short, long] { + let header_section: Vec = + vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(header_section) + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + + let body_section: Vec = vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(body_section) + .succeeds() + .stdout_is(" 1\ta\n\n 1\tb\n"); + + let footer_section: Vec = vec![b'a', b'\n', 0xFF, 0xFE, b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(footer_section) + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + } +} + +#[test] +fn test_one_char_section_delimiter() { for arg in ["-da", "--section-delimiter=a"] { new_ucmd!() .arg(arg) @@ -649,6 +692,48 @@ fn test_one_char_section_delimiter_expansion() { } } +#[test] +#[cfg(target_os = "linux")] +fn test_one_byte_section_delimiter() { + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; + + fn create_arg(prefix: &[u8]) -> OsString { + let mut v = prefix.to_vec(); + v.push(0xFF); + OsString::from_vec(v) + } + + let short = create_arg(b"-d"); + let long = create_arg(b"--section-delimiter="); + + for arg in [short, long] { + let header_section: Vec = + vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', 0xFF, b':', b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(header_section) + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + + let body_section: Vec = vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(body_section) + .succeeds() + .stdout_is(" 1\ta\n\n 1\tb\n"); + + let footer_section: Vec = vec![b'a', b'\n', 0xFF, b':', b'\n', b'b']; + + new_ucmd!() + .arg(&arg) + .pipe_in(footer_section) + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + } +} + #[test] fn test_non_ascii_one_char_section_delimiter() { for arg in ["-dä", "--section-delimiter=ä"] {