Consolidate legacy argument parsing for head/tail

2025-12-23 08:47:37 +00:00 · 2025-12-19 16:29:32 +01:00 · 2025-12-19 16:29:32 +01:00 · ac487dee94
commit ac487dee94
parent 1fca82965d
4 changed files with 247 additions and 45 deletions
--- a/src/uu/head/src/parse.rs
+++ b/src/uu/head/src/parse.rs
@ -4,7 +4,8 @@
 // file that was distributed with this source code.

 use std::ffi::OsString;
-use uucore::parser::parse_size::{ParseSizeError, parse_size_u64_max};
+use uucore::parser::parse_signed_num::{SignPrefix, parse_signed_num_max};
+use uucore::parser::parse_size::ParseSizeError;

 #[derive(PartialEq, Eq, Debug)]
 pub struct ParseError;
@ -107,30 +108,12 @@ fn process_num_block(
 }

 /// Parses an -c or -n argument,
-/// the bool specifies whether to read from the end
+/// the bool specifies whether to read from the end (all but last N)
 pub fn parse_num(src: &str) -> Result<(u64, bool), ParseSizeError> {
-    let mut size_string = src.trim();
-    let mut all_but_last = false;
-
-    if let Some(c) = size_string.chars().next() {
-        if c == '+' || c == '-' {
-            // head: '+' is not documented (8.32 man pages)
-            size_string = &size_string[1..];
-            if c == '-' {
-                all_but_last = true;
-            }
-        }
-    } else {
-        return Err(ParseSizeError::ParseFailure(src.to_string()));
-    }
-
-    // remove leading zeros so that size is interpreted as decimal, not octal
-    let trimmed_string = size_string.trim_start_matches('0');
-    if trimmed_string.is_empty() {
-        Ok((0, all_but_last))
-    } else {
-        parse_size_u64_max(trimmed_string).map(|n| (n, all_but_last))
-    }
+    let result = parse_signed_num_max(src)?;
+    // head: '-' means "all but last N"
+    let all_but_last = result.sign == Some(SignPrefix::Minus);
+    Ok((result.value, all_but_last))
 }

 #[cfg(test)]
--- a/src/uu/tail/src/args.rs
+++ b/src/uu/tail/src/args.rs
@ -13,7 +13,8 @@ use std::ffi::OsString;
 use std::io::IsTerminal;
 use std::time::Duration;
 use uucore::error::{UResult, USimpleError, UUsageError};
-use uucore::parser::parse_size::{ParseSizeError, parse_size_u64};
+use uucore::parser::parse_signed_num::{SignPrefix, parse_signed_num};
+use uucore::parser::parse_size::ParseSizeError;
 use uucore::parser::parse_time;
 use uucore::parser::shortcut_value_parser::ShortcutValueParser;
 use uucore::translate;
@ -386,27 +387,15 @@ pub fn parse_obsolete(arg: &OsString, input: Option<&OsString>) -> UResult<Optio
 }

 fn parse_num(src: &str) -> Result<Signum, ParseSizeError> {
-    let mut size_string = src.trim();
-    let mut starting_with = false;
+    let result = parse_signed_num(src)?;
+    // tail: '+' means "starting from line/byte N", default/'-' means "last N"
+    let is_plus = result.sign == Some(SignPrefix::Plus);

-    if let Some(c) = size_string.chars().next() {
-        if c == '+' || c == '-' {
-            // tail: '-' is not documented (8.32 man pages)
-            size_string = &size_string[1..];
-            if c == '+' {
-                starting_with = true;
-            }
-        }
-    }
-
-    match parse_size_u64(size_string) {
-        Ok(n) => match (n, starting_with) {
-            (0, true) => Ok(Signum::PlusZero),
-            (0, false) => Ok(Signum::MinusZero),
-            (n, true) => Ok(Signum::Positive(n)),
-            (n, false) => Ok(Signum::Negative(n)),
-        },
-        Err(_) => Err(ParseSizeError::ParseFailure(size_string.to_string())),
+    match (result.value, is_plus) {
+        (0, true) => Ok(Signum::PlusZero),
+        (0, false) => Ok(Signum::MinusZero),
+        (n, true) => Ok(Signum::Positive(n)),
+        (n, false) => Ok(Signum::Negative(n)),
    }
 }

--- a/src/uucore/src/lib/features/parser/mod.rs
+++ b/src/uucore/src/lib/features/parser/mod.rs
@ -9,6 +9,8 @@ pub mod num_parser;
 #[cfg(any(feature = "parser", feature = "parser-glob"))]
 pub mod parse_glob;
 #[cfg(any(feature = "parser", feature = "parser-size"))]
+pub mod parse_signed_num;
+#[cfg(any(feature = "parser", feature = "parser-size"))]
 pub mod parse_size;
 #[cfg(any(feature = "parser", feature = "parser-num"))]
 pub mod parse_time;
--- a/src/uucore/src/lib/features/parser/parse_signed_num.rs
+++ b/src/uucore/src/lib/features/parser/parse_signed_num.rs
@ -0,0 +1,228 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+//! Parser for signed numeric arguments used by head, tail, and similar utilities.
+//!
+//! These utilities accept arguments like `-5`, `+10`, `-100K` where the leading
+//! sign indicates different behavior (e.g., "first N" vs "last N" vs "starting from N").
+
+use super::parse_size::{ParseSizeError, parse_size_u64, parse_size_u64_max};
+
+/// The sign prefix found on a numeric argument.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SignPrefix {
+    /// Plus sign prefix (e.g., "+10")
+    Plus,
+    /// Minus sign prefix (e.g., "-10")
+    Minus,
+}
+
+/// A parsed signed numeric argument.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SignedNum {
+    /// The numeric value
+    pub value: u64,
+    /// The sign prefix that was present, if any
+    pub sign: Option<SignPrefix>,
+}
+
+impl SignedNum {
+    /// Returns true if the value is zero.
+    pub fn is_zero(&self) -> bool {
+        self.value == 0
+    }
+
+    /// Returns true if a plus sign was present.
+    pub fn has_plus(&self) -> bool {
+        self.sign == Some(SignPrefix::Plus)
+    }
+
+    /// Returns true if a minus sign was present.
+    pub fn has_minus(&self) -> bool {
+        self.sign == Some(SignPrefix::Minus)
+    }
+}
+
+/// Parse a signed numeric argument, clamping to u64::MAX on overflow.
+///
+/// This function parses strings like "10", "+5K", "-100M" where:
+/// - The optional leading `+` or `-` indicates direction/behavior
+/// - The number can have size suffixes (K, M, G, etc.)
+///
+/// # Arguments
+/// * `src` - The string to parse
+///
+/// # Returns
+/// * `Ok(SignedNum)` - The parsed value and sign
+/// * `Err(ParseSizeError)` - If the string cannot be parsed
+///
+/// # Examples
+/// ```ignore
+/// use uucore::parser::parse_signed_num::parse_signed_num_max;
+///
+/// let result = parse_signed_num_max("10").unwrap();
+/// assert_eq!(result.value, 10);
+/// assert_eq!(result.sign, None);
+///
+/// let result = parse_signed_num_max("+5K").unwrap();
+/// assert_eq!(result.value, 5 * 1024);
+/// assert_eq!(result.sign, Some(SignPrefix::Plus));
+///
+/// let result = parse_signed_num_max("-100").unwrap();
+/// assert_eq!(result.value, 100);
+/// assert_eq!(result.sign, Some(SignPrefix::Minus));
+/// ```
+pub fn parse_signed_num_max(src: &str) -> Result<SignedNum, ParseSizeError> {
+    let (sign, size_string) = strip_sign_prefix(src);
+
+    // Empty string after stripping sign is an error
+    if size_string.is_empty() {
+        return Err(ParseSizeError::ParseFailure(src.to_string()));
+    }
+
+    // Remove leading zeros so size is interpreted as decimal, not octal
+    let trimmed = size_string.trim_start_matches('0');
+    let value = if trimmed.is_empty() {
+        // All zeros (e.g., "000" or "0")
+        0
+    } else {
+        parse_size_u64_max(trimmed)?
+    };
+
+    Ok(SignedNum { value, sign })
+}
+
+/// Parse a signed numeric argument, returning error on overflow.
+///
+/// Same as [`parse_signed_num_max`] but returns an error instead of clamping
+/// when the value overflows u64.
+///
+/// Note: On parse failure, this returns an error with the raw string (without quotes)
+/// to allow callers to format the error message as needed.
+pub fn parse_signed_num(src: &str) -> Result<SignedNum, ParseSizeError> {
+    let (sign, size_string) = strip_sign_prefix(src);
+
+    // Empty string after stripping sign is an error
+    if size_string.is_empty() {
+        return Err(ParseSizeError::ParseFailure(src.to_string()));
+    }
+
+    // Use parse_size_u64 but on failure, create our own error with the raw string
+    // (without quotes) so callers can format it as needed
+    let value = parse_size_u64(size_string)
+        .map_err(|_| ParseSizeError::ParseFailure(size_string.to_string()))?;
+
+    Ok(SignedNum { value, sign })
+}
+
+/// Strip the sign prefix from a string and return both the sign and remaining string.
+fn strip_sign_prefix(src: &str) -> (Option<SignPrefix>, &str) {
+    let trimmed = src.trim();
+
+    if let Some(rest) = trimmed.strip_prefix('+') {
+        (Some(SignPrefix::Plus), rest)
+    } else if let Some(rest) = trimmed.strip_prefix('-') {
+        (Some(SignPrefix::Minus), rest)
+    } else {
+        (None, trimmed)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_no_sign() {
+        let result = parse_signed_num_max("10").unwrap();
+        assert_eq!(result.value, 10);
+        assert_eq!(result.sign, None);
+        assert!(!result.has_plus());
+        assert!(!result.has_minus());
+    }
+
+    #[test]
+    fn test_plus_sign() {
+        let result = parse_signed_num_max("+10").unwrap();
+        assert_eq!(result.value, 10);
+        assert_eq!(result.sign, Some(SignPrefix::Plus));
+        assert!(result.has_plus());
+        assert!(!result.has_minus());
+    }
+
+    #[test]
+    fn test_minus_sign() {
+        let result = parse_signed_num_max("-10").unwrap();
+        assert_eq!(result.value, 10);
+        assert_eq!(result.sign, Some(SignPrefix::Minus));
+        assert!(!result.has_plus());
+        assert!(result.has_minus());
+    }
+
+    #[test]
+    fn test_with_suffix() {
+        let result = parse_signed_num_max("+5K").unwrap();
+        assert_eq!(result.value, 5 * 1024);
+        assert!(result.has_plus());
+
+        let result = parse_signed_num_max("-2M").unwrap();
+        assert_eq!(result.value, 2 * 1024 * 1024);
+        assert!(result.has_minus());
+    }
+
+    #[test]
+    fn test_zero() {
+        let result = parse_signed_num_max("0").unwrap();
+        assert_eq!(result.value, 0);
+        assert!(result.is_zero());
+
+        let result = parse_signed_num_max("+0").unwrap();
+        assert_eq!(result.value, 0);
+        assert!(result.is_zero());
+        assert!(result.has_plus());
+
+        let result = parse_signed_num_max("-0").unwrap();
+        assert_eq!(result.value, 0);
+        assert!(result.is_zero());
+        assert!(result.has_minus());
+    }
+
+    #[test]
+    fn test_leading_zeros() {
+        let result = parse_signed_num_max("007").unwrap();
+        assert_eq!(result.value, 7);
+
+        let result = parse_signed_num_max("+007").unwrap();
+        assert_eq!(result.value, 7);
+        assert!(result.has_plus());
+
+        let result = parse_signed_num_max("000").unwrap();
+        assert_eq!(result.value, 0);
+    }
+
+    #[test]
+    fn test_whitespace() {
+        let result = parse_signed_num_max("  10  ").unwrap();
+        assert_eq!(result.value, 10);
+
+        let result = parse_signed_num_max("  +10  ").unwrap();
+        assert_eq!(result.value, 10);
+        assert!(result.has_plus());
+    }
+
+    #[test]
+    fn test_overflow_max() {
+        // Should clamp to u64::MAX instead of error
+        let result = parse_signed_num_max("99999999999999999999999999").unwrap();
+        assert_eq!(result.value, u64::MAX);
+    }
+
+    #[test]
+    fn test_invalid() {
+        assert!(parse_signed_num_max("").is_err());
+        assert!(parse_signed_num_max("abc").is_err());
+        assert!(parse_signed_num_max("++10").is_err());
+    }
+}