nl: allow non-UTF8 section delimiter

2025-12-23 08:47:37 +00:00 · 2025-09-05 16:38:00 +02:00 · 2025-09-05 16:38:00 +02:00 · 93feaccbcf
commit 93feaccbcf
parent ee1b802612
3 changed files with 98 additions and 10 deletions
--- a/src/uu/nl/src/helper.rs
+++ b/src/uu/nl/src/helper.rs
@ -16,11 +16,13 @@ pub fn parse_options(settings: &mut crate::Settings, opts: &clap::ArgMatches) ->
    // This vector holds error messages encountered.
    let mut errs: Vec<String> = vec![];
    settings.renumber = opts.get_flag(options::NO_RENUMBER);
-    if let Some(delimiter) = opts.get_one::<String>(options::SECTION_DELIMITER) {
-        // check whether the delimiter is a single ASCII char (1 byte)
-        // because GNU nl doesn't add a ':' to single non-ASCII chars
+    if let Some(delimiter) = opts.get_one::<OsString>(options::SECTION_DELIMITER) {
+        // GNU nl determines whether a delimiter is a "single character" based on byte length, not
+        // character length. A "single character" implies the second character is a ':'.
        settings.section_delimiter = if delimiter.len() == 1 {
-            format!("{delimiter}:")
+            let mut delimiter = delimiter.clone();
+            delimiter.push(":");
+            delimiter
        } else {
            delimiter.clone()
        };
--- a/src/uu/nl/src/nl.rs
+++ b/src/uu/nl/src/nl.rs
@ -4,7 +4,7 @@
 // file that was distributed with this source code.

 use clap::{Arg, ArgAction, Command};
-use std::ffi::OsString;
+use std::ffi::{OsStr, OsString};
 use std::fs::File;
 use std::io::{BufRead, BufReader, Read, stdin};
 use std::path::Path;
@ -20,7 +20,7 @@ pub struct Settings {
    body_numbering: NumberingStyle,
    footer_numbering: NumberingStyle,
    // The variable corresponding to -d
-    section_delimiter: String,
+    section_delimiter: OsString,
    // The variables corresponding to the options -v, -i, -l, -w.
    starting_line_number: i64,
    line_increment: i64,
@ -40,7 +40,7 @@ impl Default for Settings {
            header_numbering: NumberingStyle::None,
            body_numbering: NumberingStyle::NonEmpty,
            footer_numbering: NumberingStyle::None,
-            section_delimiter: String::from("\\:"),
+            section_delimiter: OsString::from("\\:"),
            starting_line_number: 1,
            line_increment: 1,
            join_blank_lines: 1,
@ -140,8 +140,8 @@ enum SectionDelimiter {
 impl SectionDelimiter {
    /// A valid section delimiter contains the pattern one to three times,
    /// and nothing else.
-    fn parse(bytes: &[u8], pattern: &str) -> Option<Self> {
-        let pattern = pattern.as_bytes();
+    fn parse(bytes: &[u8], pattern: &OsStr) -> Option<Self> {
+        let pattern = pattern.as_encoded_bytes();

        if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 {
            return None;
@ -270,6 +270,7 @@ pub fn uu_app() -> Command {
                .short('d')
                .long(options::SECTION_DELIMITER)
                .help(translate!("nl-help-section-delimiter"))
+                .value_parser(clap::value_parser!(OsString))
                .value_name("CC"),
        )
        .arg(
--- a/tests/by-util/test_nl.rs
+++ b/tests/by-util/test_nl.rs
@ -627,7 +627,50 @@ fn test_section_delimiter() {
 }

 #[test]
-fn test_one_char_section_delimiter_expansion() {
+#[cfg(target_os = "linux")]
+fn test_section_delimiter_non_utf8() {
+    use std::{ffi::OsString, os::unix::ffi::OsStringExt};
+
+    fn create_arg(prefix: &[u8]) -> OsString {
+        let section_delimiter = [0xFF, 0xFE];
+        let mut v = prefix.to_vec();
+        v.extend_from_slice(&section_delimiter);
+        OsString::from_vec(v)
+    }
+
+    let short = create_arg(b"-d");
+    let long = create_arg(b"--section-delimiter=");
+
+    for arg in [short, long] {
+        let header_section: Vec<u8> =
+            vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(header_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n       b\n");
+
+        let body_section: Vec<u8> = vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(body_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n     1\tb\n");
+
+        let footer_section: Vec<u8> = vec![b'a', b'\n', 0xFF, 0xFE, b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(footer_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n       b\n");
+    }
+}
+
+#[test]
+fn test_one_char_section_delimiter() {
    for arg in ["-da", "--section-delimiter=a"] {
        new_ucmd!()
            .arg(arg)
@ -649,6 +692,48 @@ fn test_one_char_section_delimiter_expansion() {
    }
 }

+#[test]
+#[cfg(target_os = "linux")]
+fn test_one_byte_section_delimiter() {
+    use std::{ffi::OsString, os::unix::ffi::OsStringExt};
+
+    fn create_arg(prefix: &[u8]) -> OsString {
+        let mut v = prefix.to_vec();
+        v.push(0xFF);
+        OsString::from_vec(v)
+    }
+
+    let short = create_arg(b"-d");
+    let long = create_arg(b"--section-delimiter=");
+
+    for arg in [short, long] {
+        let header_section: Vec<u8> =
+            vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', 0xFF, b':', b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(header_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n       b\n");
+
+        let body_section: Vec<u8> = vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(body_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n     1\tb\n");
+
+        let footer_section: Vec<u8> = vec![b'a', b'\n', 0xFF, b':', b'\n', b'b'];
+
+        new_ucmd!()
+            .arg(&arg)
+            .pipe_in(footer_section)
+            .succeeds()
+            .stdout_is("     1\ta\n\n       b\n");
+    }
+}
+
 #[test]
 fn test_non_ascii_one_char_section_delimiter() {
    for arg in ["-dä", "--section-delimiter=ä"] {