Add legacy +POS/-POS handling in sort to pass GNU sort-field-limit test (#9501)
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Build (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
CICD / Build/SELinux-Stubs (Non-Linux) (push) Blocked by required conditions
CICD / Safe Traversal Security Check (push) Blocked by required conditions
GnuTests / Run GNU tests (native) (push) Waiting to run
GnuTests / Run GNU tests (SELinux) (push) Waiting to run
GnuTests / Aggregate GNU test results (push) Blocked by required conditions
Android / Test builds (push) Waiting to run
Benchmarks / Run benchmarks (CodSpeed) (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
Devcontainer / Verify devcontainer (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
OpenBSD / Style and Lint (push) Waiting to run
OpenBSD / Tests (push) Waiting to run
WSL2 / Test (push) Waiting to run

* sort: add legacy +POS/-POS parsing for GNU compat

Support GNU’s obsolescent +POS1 [-POS2] syntax by translating it to -k
before clap parses args, gated by _POSIX2_VERSION. Adds tests for accept
and reject cases to ensure sort-field-limit GNU test passes.

* sort: align legacy key tests with GNU field limit

* sort: rename legacy max-field test for clarity

* Simplify legacy key parsing inputs

* Inline legacy key end serialization

* Use starts_with for legacy arg digit check
This commit is contained in:
karanabe 2025-12-15 17:59:56 +09:00 committed by GitHub
parent 64203e3098
commit 06d843fe19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 174 additions and 2 deletions

View file

@ -7,7 +7,7 @@
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef
mod buffer_hint;
mod check;
@ -51,6 +51,7 @@ use uucore::line_ending::LineEnding;
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
use uucore::parser::parse_size::{ParseSizeError, Parser};
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
use uucore::posix::{MODERN, TRADITIONAL};
use uucore::show_error;
use uucore::translate;
use uucore::version_cmp::version_cmp;
@ -1085,6 +1086,146 @@ fn get_rlimit() -> UResult<usize> {
}
const STDIN_FILE: &str = "-";
/// Legacy `+POS1 [-POS2]` syntax is permitted unless `_POSIX2_VERSION` is in
/// the [TRADITIONAL, MODERN) range (matches GNU behaviour).
fn allows_traditional_usage() -> bool {
!matches!(uucore::posix::posix_version(), Some(ver) if (TRADITIONAL..MODERN).contains(&ver))
}
#[derive(Debug, Clone)]
struct LegacyKeyPart {
field: usize,
char_pos: usize,
opts: String,
}
fn parse_usize_or_max(num: &str) -> Option<usize> {
match num.parse::<usize>() {
Ok(v) => Some(v),
Err(e) if *e.kind() == IntErrorKind::PosOverflow => Some(usize::MAX),
Err(_) => None,
}
}
fn parse_legacy_part(spec: &str) -> Option<LegacyKeyPart> {
let idx = spec.chars().take_while(|c| c.is_ascii_digit()).count();
if idx == 0 {
return None;
}
let field = parse_usize_or_max(&spec[..idx])?;
let mut char_pos = 0;
let mut rest = &spec[idx..];
if let Some(stripped) = rest.strip_prefix('.') {
let char_idx = stripped.chars().take_while(|c| c.is_ascii_digit()).count();
if char_idx == 0 {
return None;
}
char_pos = parse_usize_or_max(&stripped[..char_idx])?;
rest = &stripped[char_idx..];
}
Some(LegacyKeyPart {
field,
char_pos,
opts: rest.to_string(),
})
}
/// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic.
fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String {
let start_field = from.field.saturating_add(1);
let start_char = from.char_pos.saturating_add(1);
let mut keydef = format!(
"{}{}{}",
start_field,
if from.char_pos == 0 {
String::new()
} else {
format!(".{start_char}")
},
from.opts
);
if let Some(to) = to {
let end_field = if to.char_pos == 0 {
// When the end character index is zero, GNU keeps the field number as-is.
// Clamp to 1 to avoid generating an invalid field 0.
to.field.max(1)
} else {
to.field.saturating_add(1)
};
keydef.push(',');
keydef.push_str(&end_field.to_string());
if to.char_pos != 0 {
keydef.push('.');
keydef.push_str(&to.char_pos.to_string());
}
keydef.push_str(&to.opts);
}
keydef
}
/// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms
/// before clap sees the arguments.
fn preprocess_legacy_args<I>(args: I) -> Vec<OsString>
where
I: IntoIterator,
I::Item: Into<OsString>,
{
if !allows_traditional_usage() {
return args.into_iter().map(Into::into).collect();
}
let mut processed = Vec::new();
let mut iter = args.into_iter().map(Into::into).peekable();
while let Some(arg) = iter.next() {
if arg == "--" {
processed.push(arg);
processed.extend(iter);
break;
}
let as_str = arg.to_string_lossy();
if let Some(from_spec) = as_str.strip_prefix('+') {
if let Some(from) = parse_legacy_part(from_spec) {
let mut to_part = None;
let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string());
if let Some(next_str) = next_candidate {
if let Some(stripped) = next_str.strip_prefix('-') {
if stripped.starts_with(|c: char| c.is_ascii_digit()) {
let next_arg = iter.next().unwrap();
if let Some(parsed) = parse_legacy_part(stripped) {
to_part = Some(parsed);
} else {
processed.push(arg);
processed.push(next_arg);
continue;
}
}
}
}
let keydef = legacy_key_to_k(&from, to_part.as_ref());
processed.push(OsString::from(format!("-k{keydef}")));
continue;
}
}
processed.push(arg);
}
processed
}
#[cfg(target_os = "linux")]
const LINUX_BATCH_DIVISOR: usize = 4;
#[cfg(target_os = "linux")]
@ -1116,7 +1257,11 @@ fn default_merge_batch_size() -> usize {
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let mut settings = GlobalSettings::default();
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?;
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(
uu_app(),
preprocess_legacy_args(args),
2,
)?;
// Prevent -o/--output to be specified multiple times
if matches

View file

@ -107,6 +107,33 @@ fn test_invalid_buffer_size() {
}
}
#[test]
fn test_legacy_plus_minus_accepts_when_modern_posix2() {
let size_max = usize::MAX;
let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "aa\nbb\n");
ucmd.env("_POSIX2_VERSION", "200809")
.arg(format!("+0.{size_max}R"))
.arg("input.txt")
.succeeds()
.stdout_is("aa\nbb\n");
}
#[test]
fn test_legacy_plus_minus_accepts_with_size_max() {
let size_max = usize::MAX;
let (at, mut ucmd) = at_and_ucmd!();
at.write("input.txt", "aa\nbb\n");
ucmd.env("_POSIX2_VERSION", "200809")
.arg("+1")
.arg(format!("-1.{size_max}R"))
.arg("input.txt")
.succeeds()
.stdout_is("aa\nbb\n");
}
#[test]
fn test_ext_sort_stable() {
new_ucmd!()