mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
nl: allow non-UTF8 section delimiter
This commit is contained in:
parent
ee1b802612
commit
93feaccbcf
3 changed files with 98 additions and 10 deletions
|
|
@ -16,11 +16,13 @@ pub fn parse_options(settings: &mut crate::Settings, opts: &clap::ArgMatches) ->
|
|||
// This vector holds error messages encountered.
|
||||
let mut errs: Vec<String> = vec![];
|
||||
settings.renumber = opts.get_flag(options::NO_RENUMBER);
|
||||
if let Some(delimiter) = opts.get_one::<String>(options::SECTION_DELIMITER) {
|
||||
// check whether the delimiter is a single ASCII char (1 byte)
|
||||
// because GNU nl doesn't add a ':' to single non-ASCII chars
|
||||
if let Some(delimiter) = opts.get_one::<OsString>(options::SECTION_DELIMITER) {
|
||||
// GNU nl determines whether a delimiter is a "single character" based on byte length, not
|
||||
// character length. A "single character" implies the second character is a ':'.
|
||||
settings.section_delimiter = if delimiter.len() == 1 {
|
||||
format!("{delimiter}:")
|
||||
let mut delimiter = delimiter.clone();
|
||||
delimiter.push(":");
|
||||
delimiter
|
||||
} else {
|
||||
delimiter.clone()
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use std::ffi::OsString;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Read, stdin};
|
||||
use std::path::Path;
|
||||
|
|
@ -20,7 +20,7 @@ pub struct Settings {
|
|||
body_numbering: NumberingStyle,
|
||||
footer_numbering: NumberingStyle,
|
||||
// The variable corresponding to -d
|
||||
section_delimiter: String,
|
||||
section_delimiter: OsString,
|
||||
// The variables corresponding to the options -v, -i, -l, -w.
|
||||
starting_line_number: i64,
|
||||
line_increment: i64,
|
||||
|
|
@ -40,7 +40,7 @@ impl Default for Settings {
|
|||
header_numbering: NumberingStyle::None,
|
||||
body_numbering: NumberingStyle::NonEmpty,
|
||||
footer_numbering: NumberingStyle::None,
|
||||
section_delimiter: String::from("\\:"),
|
||||
section_delimiter: OsString::from("\\:"),
|
||||
starting_line_number: 1,
|
||||
line_increment: 1,
|
||||
join_blank_lines: 1,
|
||||
|
|
@ -140,8 +140,8 @@ enum SectionDelimiter {
|
|||
impl SectionDelimiter {
|
||||
/// A valid section delimiter contains the pattern one to three times,
|
||||
/// and nothing else.
|
||||
fn parse(bytes: &[u8], pattern: &str) -> Option<Self> {
|
||||
let pattern = pattern.as_bytes();
|
||||
fn parse(bytes: &[u8], pattern: &OsStr) -> Option<Self> {
|
||||
let pattern = pattern.as_encoded_bytes();
|
||||
|
||||
if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 {
|
||||
return None;
|
||||
|
|
@ -270,6 +270,7 @@ pub fn uu_app() -> Command {
|
|||
.short('d')
|
||||
.long(options::SECTION_DELIMITER)
|
||||
.help(translate!("nl-help-section-delimiter"))
|
||||
.value_parser(clap::value_parser!(OsString))
|
||||
.value_name("CC"),
|
||||
)
|
||||
.arg(
|
||||
|
|
|
|||
|
|
@ -627,7 +627,50 @@ fn test_section_delimiter() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_one_char_section_delimiter_expansion() {
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_section_delimiter_non_utf8() {
|
||||
use std::{ffi::OsString, os::unix::ffi::OsStringExt};
|
||||
|
||||
fn create_arg(prefix: &[u8]) -> OsString {
|
||||
let section_delimiter = [0xFF, 0xFE];
|
||||
let mut v = prefix.to_vec();
|
||||
v.extend_from_slice(§ion_delimiter);
|
||||
OsString::from_vec(v)
|
||||
}
|
||||
|
||||
let short = create_arg(b"-d");
|
||||
let long = create_arg(b"--section-delimiter=");
|
||||
|
||||
for arg in [short, long] {
|
||||
let header_section: Vec<u8> =
|
||||
vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(header_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n b\n");
|
||||
|
||||
let body_section: Vec<u8> = vec![b'a', b'\n', 0xFF, 0xFE, 0xFF, 0xFE, b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(body_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n 1\tb\n");
|
||||
|
||||
let footer_section: Vec<u8> = vec![b'a', b'\n', 0xFF, 0xFE, b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(footer_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n b\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_one_char_section_delimiter() {
|
||||
for arg in ["-da", "--section-delimiter=a"] {
|
||||
new_ucmd!()
|
||||
.arg(arg)
|
||||
|
|
@ -649,6 +692,48 @@ fn test_one_char_section_delimiter_expansion() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_one_byte_section_delimiter() {
|
||||
use std::{ffi::OsString, os::unix::ffi::OsStringExt};
|
||||
|
||||
fn create_arg(prefix: &[u8]) -> OsString {
|
||||
let mut v = prefix.to_vec();
|
||||
v.push(0xFF);
|
||||
OsString::from_vec(v)
|
||||
}
|
||||
|
||||
let short = create_arg(b"-d");
|
||||
let long = create_arg(b"--section-delimiter=");
|
||||
|
||||
for arg in [short, long] {
|
||||
let header_section: Vec<u8> =
|
||||
vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', 0xFF, b':', b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(header_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n b\n");
|
||||
|
||||
let body_section: Vec<u8> = vec![b'a', b'\n', 0xFF, b':', 0xFF, b':', b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(body_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n 1\tb\n");
|
||||
|
||||
let footer_section: Vec<u8> = vec![b'a', b'\n', 0xFF, b':', b'\n', b'b'];
|
||||
|
||||
new_ucmd!()
|
||||
.arg(&arg)
|
||||
.pipe_in(footer_section)
|
||||
.succeeds()
|
||||
.stdout_is(" 1\ta\n\n b\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_ascii_one_char_section_delimiter() {
|
||||
for arg in ["-dä", "--section-delimiter=ä"] {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue