mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
Merge pull request #8544 from cakebaker/nl_non_utf8_file_content
Some checks are pending
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/spelling (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
Some checks are pending
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Build (push) Blocked by required conditions
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Code Quality / Style/spelling (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
nl: support files with non-utf8 content
This commit is contained in:
commit
59b95298f7
2 changed files with 57 additions and 23 deletions
|
|
@ -9,10 +9,7 @@ use std::fs::File;
|
|||
use std::io::{BufRead, BufReader, Read, stdin};
|
||||
use std::path::Path;
|
||||
use uucore::error::{FromIo, UResult, USimpleError, set_exit_code};
|
||||
use uucore::translate;
|
||||
|
||||
use uucore::LocalizedCommand;
|
||||
use uucore::{format_usage, show_error};
|
||||
use uucore::{LocalizedCommand, format_usage, show_error, translate};
|
||||
|
||||
mod helper;
|
||||
|
||||
|
|
@ -79,7 +76,7 @@ enum NumberingStyle {
|
|||
All,
|
||||
NonEmpty,
|
||||
None,
|
||||
Regex(Box<regex::Regex>),
|
||||
Regex(Box<regex::bytes::Regex>),
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for NumberingStyle {
|
||||
|
|
@ -90,7 +87,7 @@ impl TryFrom<&str> for NumberingStyle {
|
|||
"a" => Ok(Self::All),
|
||||
"t" => Ok(Self::NonEmpty),
|
||||
"n" => Ok(Self::None),
|
||||
_ if s.starts_with('p') => match regex::Regex::new(&s[1..]) {
|
||||
_ if s.starts_with('p') => match regex::bytes::Regex::new(&s[1..]) {
|
||||
Ok(re) => Ok(Self::Regex(Box::new(re))),
|
||||
Err(_) => Err(translate!("nl-error-invalid-regex")),
|
||||
},
|
||||
|
|
@ -143,19 +140,30 @@ enum SectionDelimiter {
|
|||
impl SectionDelimiter {
|
||||
/// A valid section delimiter contains the pattern one to three times,
|
||||
/// and nothing else.
|
||||
fn parse(s: &str, pattern: &str) -> Option<Self> {
|
||||
if s.is_empty() || pattern.is_empty() {
|
||||
fn parse(bytes: &[u8], pattern: &str) -> Option<Self> {
|
||||
let pattern = pattern.as_bytes();
|
||||
|
||||
if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let pattern_count = s.matches(pattern).count();
|
||||
let is_length_ok = pattern_count * pattern.len() == s.len();
|
||||
let count = bytes.len() / pattern.len();
|
||||
if !(1..=3).contains(&count) {
|
||||
return None;
|
||||
}
|
||||
|
||||
match (pattern_count, is_length_ok) {
|
||||
(3, true) => Some(Self::Header),
|
||||
(2, true) => Some(Self::Body),
|
||||
(1, true) => Some(Self::Footer),
|
||||
_ => None,
|
||||
if bytes
|
||||
.chunks_exact(pattern.len())
|
||||
.all(|chunk| chunk == pattern)
|
||||
{
|
||||
match count {
|
||||
1 => Some(Self::Footer),
|
||||
2 => Some(Self::Body),
|
||||
3 => Some(Self::Header),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -338,9 +346,21 @@ pub fn uu_app() -> Command {
|
|||
/// `nl` implements the main functionality for an individual buffer.
|
||||
fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings) -> UResult<()> {
|
||||
let mut current_numbering_style = &settings.body_numbering;
|
||||
let mut line = Vec::new();
|
||||
|
||||
for line in reader.lines() {
|
||||
let line = line.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
|
||||
loop {
|
||||
line.clear();
|
||||
// reads up to and including b'\n'; returns 0 on EOF
|
||||
let n = reader
|
||||
.read_until(b'\n', &mut line)
|
||||
.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if line.last().copied() == Some(b'\n') {
|
||||
line.pop();
|
||||
}
|
||||
|
||||
if line.is_empty() {
|
||||
stats.consecutive_empty_lines += 1;
|
||||
|
|
@ -387,11 +407,12 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
|
|||
));
|
||||
};
|
||||
println!(
|
||||
"{}{}{line}",
|
||||
"{}{}{}",
|
||||
settings
|
||||
.number_format
|
||||
.format(line_number, settings.number_width),
|
||||
settings.number_separator.to_string_lossy(),
|
||||
String::from_utf8_lossy(&line),
|
||||
);
|
||||
// update line number for the potential next line
|
||||
match line_number.checked_add(settings.line_increment) {
|
||||
|
|
@ -400,7 +421,7 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
|
|||
}
|
||||
} else {
|
||||
let spaces = " ".repeat(settings.number_width + 1);
|
||||
println!("{spaces}{line}");
|
||||
println!("{spaces}{}", String::from_utf8_lossy(&line));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,7 @@
|
|||
// file that was distributed with this source code.
|
||||
//
|
||||
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää
|
||||
use uutests::at_and_ucmd;
|
||||
use uutests::new_ucmd;
|
||||
use uutests::util::TestScenario;
|
||||
use uutests::util_name;
|
||||
use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name};
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
|
|
@ -702,3 +699,19 @@ fn test_directory_as_input() {
|
|||
.stderr_is(format!("nl: {dir}: Is a directory\n"))
|
||||
.stdout_contains(content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_with_non_utf8_content() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
|
||||
let filename = "file";
|
||||
let content: &[u8] = b"a\n\xFF\xFE\nb";
|
||||
let invalid_utf8: &[u8] = b"\xFF\xFE";
|
||||
|
||||
at.write_bytes(filename, content);
|
||||
|
||||
ucmd.arg(filename).succeeds().stdout_is(format!(
|
||||
" 1\ta\n 2\t{}\n 3\tb\n",
|
||||
String::from_utf8_lossy(invalid_utf8)
|
||||
));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue