Merge pull request #8544 from cakebaker/nl_non_utf8_file_content
Some checks are pending
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/spelling (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run

nl: support files with non-utf8 content
This commit is contained in:
Sylvestre Ledru 2025-09-03 22:41:01 +02:00 committed by GitHub
commit 59b95298f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 23 deletions

View file

@ -9,10 +9,7 @@ use std::fs::File;
use std::io::{BufRead, BufReader, Read, stdin};
use std::path::Path;
use uucore::error::{FromIo, UResult, USimpleError, set_exit_code};
use uucore::translate;
use uucore::LocalizedCommand;
use uucore::{format_usage, show_error};
use uucore::{LocalizedCommand, format_usage, show_error, translate};
mod helper;
@ -79,7 +76,7 @@ enum NumberingStyle {
All,
NonEmpty,
None,
Regex(Box<regex::Regex>),
Regex(Box<regex::bytes::Regex>),
}
impl TryFrom<&str> for NumberingStyle {
@ -90,7 +87,7 @@ impl TryFrom<&str> for NumberingStyle {
"a" => Ok(Self::All),
"t" => Ok(Self::NonEmpty),
"n" => Ok(Self::None),
_ if s.starts_with('p') => match regex::Regex::new(&s[1..]) {
_ if s.starts_with('p') => match regex::bytes::Regex::new(&s[1..]) {
Ok(re) => Ok(Self::Regex(Box::new(re))),
Err(_) => Err(translate!("nl-error-invalid-regex")),
},
@ -143,19 +140,30 @@ enum SectionDelimiter {
impl SectionDelimiter {
/// A valid section delimiter contains the pattern one to three times,
/// and nothing else.
fn parse(s: &str, pattern: &str) -> Option<Self> {
if s.is_empty() || pattern.is_empty() {
fn parse(bytes: &[u8], pattern: &str) -> Option<Self> {
let pattern = pattern.as_bytes();
if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 {
return None;
}
let pattern_count = s.matches(pattern).count();
let is_length_ok = pattern_count * pattern.len() == s.len();
let count = bytes.len() / pattern.len();
if !(1..=3).contains(&count) {
return None;
}
match (pattern_count, is_length_ok) {
(3, true) => Some(Self::Header),
(2, true) => Some(Self::Body),
(1, true) => Some(Self::Footer),
_ => None,
if bytes
.chunks_exact(pattern.len())
.all(|chunk| chunk == pattern)
{
match count {
1 => Some(Self::Footer),
2 => Some(Self::Body),
3 => Some(Self::Header),
_ => unreachable!(),
}
} else {
None
}
}
}
@ -338,9 +346,21 @@ pub fn uu_app() -> Command {
/// `nl` implements the main functionality for an individual buffer.
fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings) -> UResult<()> {
let mut current_numbering_style = &settings.body_numbering;
let mut line = Vec::new();
for line in reader.lines() {
let line = line.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
loop {
line.clear();
// reads up to and including b'\n'; returns 0 on EOF
let n = reader
.read_until(b'\n', &mut line)
.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
if n == 0 {
break;
}
if line.last().copied() == Some(b'\n') {
line.pop();
}
if line.is_empty() {
stats.consecutive_empty_lines += 1;
@ -387,11 +407,12 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
));
};
println!(
"{}{}{line}",
"{}{}{}",
settings
.number_format
.format(line_number, settings.number_width),
settings.number_separator.to_string_lossy(),
String::from_utf8_lossy(&line),
);
// update line number for the potential next line
match line_number.checked_add(settings.line_increment) {
@ -400,7 +421,7 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
}
} else {
let spaces = " ".repeat(settings.number_width + 1);
println!("{spaces}{line}");
println!("{spaces}{}", String::from_utf8_lossy(&line));
}
}
}

View file

@ -4,10 +4,7 @@
// file that was distributed with this source code.
//
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää
use uutests::at_and_ucmd;
use uutests::new_ucmd;
use uutests::util::TestScenario;
use uutests::util_name;
use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name};
#[test]
#[cfg(target_os = "linux")]
@ -702,3 +699,19 @@ fn test_directory_as_input() {
.stderr_is(format!("nl: {dir}: Is a directory\n"))
.stdout_contains(content);
}
#[test]
fn test_file_with_non_utf8_content() {
let (at, mut ucmd) = at_and_ucmd!();
let filename = "file";
let content: &[u8] = b"a\n\xFF\xFE\nb";
let invalid_utf8: &[u8] = b"\xFF\xFE";
at.write_bytes(filename, content);
ucmd.arg(filename).succeeds().stdout_is(format!(
" 1\ta\n 2\t{}\n 3\tb\n",
String::from_utf8_lossy(invalid_utf8)
));
}