fold: Adding combining character support (#9328)
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Build (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Build/SELinux-Stubs (Non-Linux) (push) Blocked by required conditions
CICD / Safe Traversal Security Check (push) Blocked by required conditions
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Android / Test builds (push) Waiting to run
Benchmarks / Run benchmarks (CodSpeed) (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
OpenBSD / Style and Lint (push) Waiting to run
OpenBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run

* Adding combining character support for fold

* add fullwidth to the spell ignore list

* addressing comments and cargo fmt fixes

* clippy fixes for test files

---------

Co-authored-by: Christopher Illarionova <drydench@amazon.com>
Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
This commit is contained in:
Chris Dryden 2025-11-20 03:36:56 -05:00 committed by GitHub
parent 6c44000d08
commit 2a314c7ff3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 44 additions and 0 deletions

View file

@ -434,6 +434,15 @@ fn process_utf8_line<W: Write>(line: &str, ctx: &mut FoldContext<'_, W>) -> URes
let mut iter = line.char_indices().peekable();
while let Some((byte_idx, ch)) = iter.next() {
// Include combining characters with the base character
while let Some(&(_, next_ch)) = iter.peek() {
if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 {
iter.next();
} else {
break;
}
}
let next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len());
if ch == '\n' {

View file

@ -2,6 +2,8 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore fullwidth
use uutests::new_ucmd;
#[test]
@ -597,3 +599,36 @@ fn test_all_tab_advances_at_non_utf8_character() {
.succeeds()
.stdout_is_fixture_bytes("non_utf8_tab_stops_w16.expected");
}
#[test]
fn test_combining_characters_nfc() {
// e acute NFC form (single character)
let e_acute_nfc = "\u{00E9}"; // é as single character
new_ucmd!()
.arg("-w2")
.pipe_in(format!("{e_acute_nfc}{e_acute_nfc}{e_acute_nfc}"))
.succeeds()
.stdout_is(format!("{e_acute_nfc}{e_acute_nfc}\n{e_acute_nfc}"));
}
#[test]
fn test_combining_characters_nfd() {
// e acute NFD form (base + combining acute)
let e_acute_nfd = "e\u{0301}"; // e + combining acute accent
new_ucmd!()
.arg("-w2")
.pipe_in(format!("{e_acute_nfd}{e_acute_nfd}{e_acute_nfd}"))
.succeeds()
.stdout_is(format!("{e_acute_nfd}{e_acute_nfd}\n{e_acute_nfd}"));
}
#[test]
fn test_fullwidth_characters() {
// e fullwidth (takes 2 columns)
let e_fullwidth = "\u{FF45}"; //
new_ucmd!()
.arg("-w2")
.pipe_in(format!("{e_fullwidth}{e_fullwidth}"))
.succeeds()
.stdout_is(format!("{e_fullwidth}\n{e_fullwidth}"));
}