From ac487dee941a7168eba07b33709743535ec98163 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 19 Dec 2025 16:29:32 +0100 Subject: [PATCH] Consolidate legacy argument parsing for head/tail --- src/uu/head/src/parse.rs | 31 +-- src/uu/tail/src/args.rs | 31 +-- src/uucore/src/lib/features/parser/mod.rs | 2 + .../lib/features/parser/parse_signed_num.rs | 228 ++++++++++++++++++ 4 files changed, 247 insertions(+), 45 deletions(-) create mode 100644 src/uucore/src/lib/features/parser/parse_signed_num.rs diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index ed1345d16..54025a89d 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -4,7 +4,8 @@ // file that was distributed with this source code. use std::ffi::OsString; -use uucore::parser::parse_size::{ParseSizeError, parse_size_u64_max}; +use uucore::parser::parse_signed_num::{SignPrefix, parse_signed_num_max}; +use uucore::parser::parse_size::ParseSizeError; #[derive(PartialEq, Eq, Debug)] pub struct ParseError; @@ -107,30 +108,12 @@ fn process_num_block( } /// Parses an -c or -n argument, -/// the bool specifies whether to read from the end +/// the bool specifies whether to read from the end (all but last N) pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> { - let mut size_string = src.trim(); - let mut all_but_last = false; - - if let Some(c) = size_string.chars().next() { - if c == '+' || c == '-' { - // head: '+' is not documented (8.32 man pages) - size_string = &size_string[1..]; - if c == '-' { - all_but_last = true; - } - } - } else { - return Err(ParseSizeError::ParseFailure(src.to_string())); - } - - // remove leading zeros so that size is interpreted as decimal, not octal - let trimmed_string = size_string.trim_start_matches('0'); - if trimmed_string.is_empty() { - Ok((0, all_but_last)) - } else { - parse_size_u64_max(trimmed_string).map(|n| (n, all_but_last)) - } + let result = parse_signed_num_max(src)?; + // head: '-' means "all but last N" + let all_but_last = result.sign == Some(SignPrefix::Minus); + Ok((result.value, all_but_last)) } #[cfg(test)] diff --git a/src/uu/tail/src/args.rs b/src/uu/tail/src/args.rs index ef53b3943..16f4c765e 100644 --- a/src/uu/tail/src/args.rs +++ b/src/uu/tail/src/args.rs @@ -13,7 +13,8 @@ use std::ffi::OsString; use std::io::IsTerminal; use std::time::Duration; use uucore::error::{UResult, USimpleError, UUsageError}; -use uucore::parser::parse_size::{ParseSizeError, parse_size_u64}; +use uucore::parser::parse_signed_num::{SignPrefix, parse_signed_num}; +use uucore::parser::parse_size::ParseSizeError; use uucore::parser::parse_time; use uucore::parser::shortcut_value_parser::ShortcutValueParser; use uucore::translate; @@ -386,27 +387,15 @@ pub fn parse_obsolete(arg: &OsString, input: Option<&OsString>) -> UResult Result { - let mut size_string = src.trim(); - let mut starting_with = false; + let result = parse_signed_num(src)?; + // tail: '+' means "starting from line/byte N", default/'-' means "last N" + let is_plus = result.sign == Some(SignPrefix::Plus); - if let Some(c) = size_string.chars().next() { - if c == '+' || c == '-' { - // tail: '-' is not documented (8.32 man pages) - size_string = &size_string[1..]; - if c == '+' { - starting_with = true; - } - } - } - - match parse_size_u64(size_string) { - Ok(n) => match (n, starting_with) { - (0, true) => Ok(Signum::PlusZero), - (0, false) => Ok(Signum::MinusZero), - (n, true) => Ok(Signum::Positive(n)), - (n, false) => Ok(Signum::Negative(n)), - }, - Err(_) => Err(ParseSizeError::ParseFailure(size_string.to_string())), + match (result.value, is_plus) { + (0, true) => Ok(Signum::PlusZero), + (0, false) => Ok(Signum::MinusZero), + (n, true) => Ok(Signum::Positive(n)), + (n, false) => Ok(Signum::Negative(n)), } } diff --git a/src/uucore/src/lib/features/parser/mod.rs b/src/uucore/src/lib/features/parser/mod.rs index d2fc27721..d9a6ffb43 100644 --- a/src/uucore/src/lib/features/parser/mod.rs +++ b/src/uucore/src/lib/features/parser/mod.rs @@ -9,6 +9,8 @@ pub mod num_parser; #[cfg(any(feature = "parser", feature = "parser-glob"))] pub mod parse_glob; #[cfg(any(feature = "parser", feature = "parser-size"))] +pub mod parse_signed_num; +#[cfg(any(feature = "parser", feature = "parser-size"))] pub mod parse_size; #[cfg(any(feature = "parser", feature = "parser-num"))] pub mod parse_time; diff --git a/src/uucore/src/lib/features/parser/parse_signed_num.rs b/src/uucore/src/lib/features/parser/parse_signed_num.rs new file mode 100644 index 000000000..82ffcaaca --- /dev/null +++ b/src/uucore/src/lib/features/parser/parse_signed_num.rs @@ -0,0 +1,228 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Parser for signed numeric arguments used by head, tail, and similar utilities. +//! +//! These utilities accept arguments like `-5`, `+10`, `-100K` where the leading +//! sign indicates different behavior (e.g., "first N" vs "last N" vs "starting from N"). + +use super::parse_size::{ParseSizeError, parse_size_u64, parse_size_u64_max}; + +/// The sign prefix found on a numeric argument. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SignPrefix { + /// Plus sign prefix (e.g., "+10") + Plus, + /// Minus sign prefix (e.g., "-10") + Minus, +} + +/// A parsed signed numeric argument. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SignedNum { + /// The numeric value + pub value: u64, + /// The sign prefix that was present, if any + pub sign: Option, +} + +impl SignedNum { + /// Returns true if the value is zero. + pub fn is_zero(&self) -> bool { + self.value == 0 + } + + /// Returns true if a plus sign was present. + pub fn has_plus(&self) -> bool { + self.sign == Some(SignPrefix::Plus) + } + + /// Returns true if a minus sign was present. + pub fn has_minus(&self) -> bool { + self.sign == Some(SignPrefix::Minus) + } +} + +/// Parse a signed numeric argument, clamping to u64::MAX on overflow. +/// +/// This function parses strings like "10", "+5K", "-100M" where: +/// - The optional leading `+` or `-` indicates direction/behavior +/// - The number can have size suffixes (K, M, G, etc.) +/// +/// # Arguments +/// * `src` - The string to parse +/// +/// # Returns +/// * `Ok(SignedNum)` - The parsed value and sign +/// * `Err(ParseSizeError)` - If the string cannot be parsed +/// +/// # Examples +/// ```ignore +/// use uucore::parser::parse_signed_num::parse_signed_num_max; +/// +/// let result = parse_signed_num_max("10").unwrap(); +/// assert_eq!(result.value, 10); +/// assert_eq!(result.sign, None); +/// +/// let result = parse_signed_num_max("+5K").unwrap(); +/// assert_eq!(result.value, 5 * 1024); +/// assert_eq!(result.sign, Some(SignPrefix::Plus)); +/// +/// let result = parse_signed_num_max("-100").unwrap(); +/// assert_eq!(result.value, 100); +/// assert_eq!(result.sign, Some(SignPrefix::Minus)); +/// ``` +pub fn parse_signed_num_max(src: &str) -> Result { + let (sign, size_string) = strip_sign_prefix(src); + + // Empty string after stripping sign is an error + if size_string.is_empty() { + return Err(ParseSizeError::ParseFailure(src.to_string())); + } + + // Remove leading zeros so size is interpreted as decimal, not octal + let trimmed = size_string.trim_start_matches('0'); + let value = if trimmed.is_empty() { + // All zeros (e.g., "000" or "0") + 0 + } else { + parse_size_u64_max(trimmed)? + }; + + Ok(SignedNum { value, sign }) +} + +/// Parse a signed numeric argument, returning error on overflow. +/// +/// Same as [`parse_signed_num_max`] but returns an error instead of clamping +/// when the value overflows u64. +/// +/// Note: On parse failure, this returns an error with the raw string (without quotes) +/// to allow callers to format the error message as needed. +pub fn parse_signed_num(src: &str) -> Result { + let (sign, size_string) = strip_sign_prefix(src); + + // Empty string after stripping sign is an error + if size_string.is_empty() { + return Err(ParseSizeError::ParseFailure(src.to_string())); + } + + // Use parse_size_u64 but on failure, create our own error with the raw string + // (without quotes) so callers can format it as needed + let value = parse_size_u64(size_string) + .map_err(|_| ParseSizeError::ParseFailure(size_string.to_string()))?; + + Ok(SignedNum { value, sign }) +} + +/// Strip the sign prefix from a string and return both the sign and remaining string. +fn strip_sign_prefix(src: &str) -> (Option, &str) { + let trimmed = src.trim(); + + if let Some(rest) = trimmed.strip_prefix('+') { + (Some(SignPrefix::Plus), rest) + } else if let Some(rest) = trimmed.strip_prefix('-') { + (Some(SignPrefix::Minus), rest) + } else { + (None, trimmed) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_sign() { + let result = parse_signed_num_max("10").unwrap(); + assert_eq!(result.value, 10); + assert_eq!(result.sign, None); + assert!(!result.has_plus()); + assert!(!result.has_minus()); + } + + #[test] + fn test_plus_sign() { + let result = parse_signed_num_max("+10").unwrap(); + assert_eq!(result.value, 10); + assert_eq!(result.sign, Some(SignPrefix::Plus)); + assert!(result.has_plus()); + assert!(!result.has_minus()); + } + + #[test] + fn test_minus_sign() { + let result = parse_signed_num_max("-10").unwrap(); + assert_eq!(result.value, 10); + assert_eq!(result.sign, Some(SignPrefix::Minus)); + assert!(!result.has_plus()); + assert!(result.has_minus()); + } + + #[test] + fn test_with_suffix() { + let result = parse_signed_num_max("+5K").unwrap(); + assert_eq!(result.value, 5 * 1024); + assert!(result.has_plus()); + + let result = parse_signed_num_max("-2M").unwrap(); + assert_eq!(result.value, 2 * 1024 * 1024); + assert!(result.has_minus()); + } + + #[test] + fn test_zero() { + let result = parse_signed_num_max("0").unwrap(); + assert_eq!(result.value, 0); + assert!(result.is_zero()); + + let result = parse_signed_num_max("+0").unwrap(); + assert_eq!(result.value, 0); + assert!(result.is_zero()); + assert!(result.has_plus()); + + let result = parse_signed_num_max("-0").unwrap(); + assert_eq!(result.value, 0); + assert!(result.is_zero()); + assert!(result.has_minus()); + } + + #[test] + fn test_leading_zeros() { + let result = parse_signed_num_max("007").unwrap(); + assert_eq!(result.value, 7); + + let result = parse_signed_num_max("+007").unwrap(); + assert_eq!(result.value, 7); + assert!(result.has_plus()); + + let result = parse_signed_num_max("000").unwrap(); + assert_eq!(result.value, 0); + } + + #[test] + fn test_whitespace() { + let result = parse_signed_num_max(" 10 ").unwrap(); + assert_eq!(result.value, 10); + + let result = parse_signed_num_max(" +10 ").unwrap(); + assert_eq!(result.value, 10); + assert!(result.has_plus()); + } + + #[test] + fn test_overflow_max() { + // Should clamp to u64::MAX instead of error + let result = parse_signed_num_max("99999999999999999999999999").unwrap(); + assert_eq!(result.value, u64::MAX); + } + + #[test] + fn test_invalid() { + assert!(parse_signed_num_max("").is_err()); + assert!(parse_signed_num_max("abc").is_err()); + assert!(parse_signed_num_max("++10").is_err()); + } +}