mirror of
https://github.com/uutils/coreutils.git
synced 2025-07-07 21:45:01 +00:00
fmt: fix the last two gnu issues
This commit is contained in:
parent
56ce0e28ad
commit
2df7f01c15
3 changed files with 95 additions and 10 deletions
|
@ -2,6 +2,9 @@
|
|||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore plass samp
|
||||
|
||||
use uutests::new_ucmd;
|
||||
use uutests::util::TestScenario;
|
||||
use uutests::util_name;
|
||||
|
@ -303,3 +306,73 @@ fn prefix_equal_skip_prefix_equal_two() {
|
|||
.stdout_is_fixture("prefixed-one-word-per-line_p=_P=2.txt");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fmt_unicode_whitespace_handling() {
|
||||
// Character classification fix: Test that Unicode whitespace characters like non-breaking space
|
||||
// are NOT treated as whitespace by fmt, maintaining GNU fmt compatibility.
|
||||
// GNU fmt only recognizes ASCII whitespace (space, tab, newline, etc.) and excludes
|
||||
// Unicode whitespace characters to ensure consistent formatting behavior.
|
||||
// This prevents regression of the character classification fix
|
||||
let non_breaking_space = "\u{00A0}"; // U+00A0 NO-BREAK SPACE
|
||||
let figure_space = "\u{2007}"; // U+2007 FIGURE SPACE
|
||||
let narrow_no_break_space = "\u{202F}"; // U+202F NARROW NO-BREAK SPACE
|
||||
|
||||
// When fmt splits on width=1, these characters should NOT cause line breaks
|
||||
// because they should not be considered whitespace
|
||||
for (name, char) in [
|
||||
("non-breaking space", non_breaking_space),
|
||||
("figure space", figure_space),
|
||||
("narrow no-break space", narrow_no_break_space),
|
||||
] {
|
||||
let input = format!("={char}=");
|
||||
let result = new_ucmd!()
|
||||
.args(&["-s", "-w1"])
|
||||
.pipe_in(input.as_bytes())
|
||||
.succeeds();
|
||||
|
||||
// Should be 1 line since the Unicode char is not treated as whitespace
|
||||
assert_eq!(
|
||||
result.stdout_str().lines().count(),
|
||||
1,
|
||||
"Failed for {name}: Unicode character should not be treated as whitespace"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fmt_knuth_plass_line_breaking() {
|
||||
// Line breaking algorithm improvements: Test the enhanced Knuth-Plass optimal line breaking
|
||||
// algorithm that better handles sentence boundaries, word positioning constraints,
|
||||
// and produces more natural line breaks for complex text formatting.
|
||||
// This prevents regression of the line breaking algorithm improvements
|
||||
let input = "@command{fmt} prefers breaking lines at the end of a sentence, and tries to\n\
|
||||
avoid line breaks after the first word of a sentence or before the last word\n\
|
||||
of a sentence. A @dfn{sentence break} is defined as either the end of a\n\
|
||||
paragraph or a word ending in any of @samp{.?!}, followed by two spaces or end\n\
|
||||
of line, ignoring any intervening parentheses or quotes. Like @TeX{},\n\
|
||||
@command{fmt} reads entire ''paragraphs'' before choosing line breaks; the\n\
|
||||
algorithm is a variant of that given by\n\
|
||||
Donald E. Knuth and Michael F. Plass\n\
|
||||
in ''Breaking Paragraphs Into Lines'',\n\
|
||||
@cite{Software---Practice & Experience}\n\
|
||||
@b{11}, 11 (November 1981), 1119--1184.";
|
||||
|
||||
let expected = "@command{fmt} prefers breaking lines at the end of a sentence,\n\
|
||||
and tries to avoid line breaks after the first word of a sentence\n\
|
||||
or before the last word of a sentence. A @dfn{sentence break}\n\
|
||||
is defined as either the end of a paragraph or a word ending\n\
|
||||
in any of @samp{.?!}, followed by two spaces or end of line,\n\
|
||||
ignoring any intervening parentheses or quotes. Like @TeX{},\n\
|
||||
@command{fmt} reads entire ''paragraphs'' before choosing line\n\
|
||||
breaks; the algorithm is a variant of that given by Donald\n\
|
||||
E. Knuth and Michael F. Plass in ''Breaking Paragraphs Into\n\
|
||||
Lines'', @cite{Software---Practice & Experience} @b{11}, 11\n\
|
||||
(November 1981), 1119--1184.\n";
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-g", "60", "-w", "72"])
|
||||
.pipe_in(input)
|
||||
.succeeds()
|
||||
.stdout_is(expected);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue