diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index f14ed3cf0..a2ddbed6a 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -434,6 +434,15 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes let mut iter = line.char_indices().peekable(); while let Some((byte_idx, ch)) = iter.next() { + // Include combining characters with the base character + while let Some(&(_, next_ch)) = iter.peek() { + if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 { + iter.next(); + } else { + break; + } + } + let next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); if ch == '\n' { diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index 04072ab15..9497044c9 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -2,6 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// spell-checker:ignore fullwidth + use uutests::new_ucmd; #[test] @@ -597,3 +599,36 @@ fn test_all_tab_advances_at_non_utf8_character() { .succeeds() .stdout_is_fixture_bytes("non_utf8_tab_stops_w16.expected"); } + +#[test] +fn test_combining_characters_nfc() { + // e acute NFC form (single character) + let e_acute_nfc = "\u{00E9}"; // é as single character + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{e_acute_nfc}{e_acute_nfc}{e_acute_nfc}")) + .succeeds() + .stdout_is(format!("{e_acute_nfc}{e_acute_nfc}\n{e_acute_nfc}")); +} + +#[test] +fn test_combining_characters_nfd() { + // e acute NFD form (base + combining acute) + let e_acute_nfd = "e\u{0301}"; // e + combining acute accent + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{e_acute_nfd}{e_acute_nfd}{e_acute_nfd}")) + .succeeds() + .stdout_is(format!("{e_acute_nfd}{e_acute_nfd}\n{e_acute_nfd}")); +} + +#[test] +fn test_fullwidth_characters() { + // e fullwidth (takes 2 columns) + let e_fullwidth = "\u{FF45}"; // e + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{e_fullwidth}{e_fullwidth}")) + .succeeds() + .stdout_is(format!("{e_fullwidth}\n{e_fullwidth}")); +}