From 3849d91d69f00652ec2e6cf51d29f931ebb34d72 Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Thu, 6 Nov 2025 16:42:04 +0100 Subject: [PATCH] checksum: Move cksum computation to uucore::checksum --- src/uu/cksum/src/cksum.rs | 262 +----------------- .../src/lib/features/checksum/compute.rs | 246 ++++++++++++++++ src/uucore/src/lib/features/checksum/mod.rs | 1 + 3 files changed, 259 insertions(+), 250 deletions(-) create mode 100644 src/uucore/src/lib/features/checksum/compute.rs diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index a0a5c00df..0a246b8e9 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -8,257 +8,20 @@ use clap::builder::ValueParser; use clap::{Arg, ArgAction, Command}; use std::ffi::{OsStr, OsString}; -use std::fs::File; -use std::io::{BufReader, Read, Write, stdin, stdout}; use std::iter; -use std::path::Path; -use uucore::checksum::validate::{ChecksumOptions, ChecksumVerbose, perform_checksum_validation}; +use uucore::checksum::compute::{ + ChecksumComputeOptions, DigestFormat, OutputFormat, ReadingMode, perform_checksum_computation, +}; +use uucore::checksum::validate::{ + ChecksumValidateOptions, ChecksumVerbose, perform_checksum_validation, +}; use uucore::checksum::{ AlgoKind, ChecksumError, SUPPORTED_ALGORITHMS, SizedAlgoKind, calculate_blake2b_length_str, - digest_reader, sanitize_sha2_sha3_length_str, + sanitize_sha2_sha3_length_str, }; -use uucore::translate; - -use uucore::{ - encoding, - error::{FromIo, UResult, USimpleError}, - format_usage, - line_ending::LineEnding, - os_str_as_bytes, show, - sum::Digest, -}; - -struct Options { - algo_kind: SizedAlgoKind, - digest: Box, - output_format: OutputFormat, - line_ending: LineEnding, -} - -/// Reading mode used to compute digest. -/// -/// On most linux systems, this is irrelevant, as there is no distinction -/// between text and binary files. Refer to GNU's cksum documentation for more -/// information. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ReadingMode { - Binary, - Text, -} - -impl ReadingMode { - #[inline] - fn as_char(&self) -> char { - match self { - Self::Binary => '*', - Self::Text => ' ', - } - } -} - -/// Whether to write the digest as hexadecimal or encoded in base64. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum DigestFormat { - Hexadecimal, - Base64, -} - -impl DigestFormat { - #[inline] - fn is_base64(&self) -> bool { - *self == Self::Base64 - } -} - -/// Holds the representation that shall be used for printing a checksum line -#[derive(Debug, PartialEq, Eq)] -enum OutputFormat { - /// Raw digest - Raw, - - /// Selected for older algorithms which had their custom formatting - /// - /// Default for crc, sysv, bsd - Legacy, - - /// `$ALGO_NAME ($FILENAME) = $DIGEST` - Tagged(DigestFormat), - - /// '$DIGEST $FLAG$FILENAME' - /// where 'flag' depends on the reading mode - /// - /// Default for standalone checksum utilities - Untagged(DigestFormat, ReadingMode), -} - -impl OutputFormat { - #[inline] - fn is_raw(&self) -> bool { - *self == Self::Raw - } -} - -fn print_legacy_checksum( - options: &Options, - filename: &OsStr, - sum: &str, - size: usize, -) -> UResult<()> { - debug_assert!(options.algo_kind.is_legacy()); - - // Print the sum - match options.algo_kind { - SizedAlgoKind::Sysv => print!( - "{} {}", - sum.parse::().unwrap(), - size.div_ceil(options.algo_kind.bitlen()), - ), - SizedAlgoKind::Bsd => { - // The BSD checksum output is 5 digit integer - let bsd_width = 5; - print!( - "{:0bsd_width$} {:bsd_width$}", - sum.parse::().unwrap(), - size.div_ceil(options.algo_kind.bitlen()), - ); - } - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - print!("{sum} {size}"); - } - _ => unreachable!("Not a legacy algorithm"), - } - - // Print the filename after a space if not stdin - if filename != "-" { - print!(" "); - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - } - - Ok(()) -} - -fn print_tagged_checksum(options: &Options, filename: &OsStr, sum: &String) -> UResult<()> { - // Print algo name and opening parenthesis. - print!("{} (", options.algo_kind.to_tag()); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - // Print closing parenthesis and sum - print!(") = {sum}"); - - Ok(()) -} - -fn print_untagged_checksum( - filename: &OsStr, - sum: &String, - reading_mode: ReadingMode, -) -> UResult<()> { - // Print checksum and reading mode flag - print!("{sum} {}", reading_mode.as_char()); - - // Print filename - let _dropped_result = stdout().write_all(os_str_as_bytes(filename)?); - - Ok(()) -} - -/// Calculate checksum -/// -/// # Arguments -/// -/// * `options` - CLI options for the assigning checksum algorithm -/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum -fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()> -where - I: Iterator, -{ - let mut files = files.peekable(); - - while let Some(filename) = files.next() { - // Check that in raw mode, we are not provided with several files. - if options.output_format.is_raw() && files.peek().is_some() { - return Err(Box::new(ChecksumError::RawMultipleFiles)); - } - - let filepath = Path::new(filename); - let stdin_buf; - let file_buf; - if filepath.is_dir() { - show!(USimpleError::new( - 1, - translate!("cksum-error-is-directory", "file" => filepath.display()) - )); - continue; - } - - // Handle the file input - let mut file = BufReader::new(if filename == "-" { - stdin_buf = stdin(); - Box::new(stdin_buf) as Box - } else { - file_buf = match File::open(filepath) { - Ok(file) => file, - Err(err) => { - show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); - continue; - } - }; - Box::new(file_buf) as Box - }); - - let (sum_hex, sz) = digest_reader( - &mut options.digest, - &mut file, - false, - options.algo_kind.bitlen(), - ) - .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; - - // Encodes the sum if df is Base64, leaves as-is otherwise. - let encode_sum = |sum: String, df: DigestFormat| { - if df.is_base64() { - encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) - } else { - sum - } - }; - - match options.output_format { - OutputFormat::Raw => { - let bytes = match options.algo_kind { - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - _ => hex::decode(sum_hex).unwrap(), - }; - // Cannot handle multiple files anyway, output immediately. - stdout().write_all(&bytes)?; - return Ok(()); - } - OutputFormat::Legacy => { - print_legacy_checksum(&options, filename, &sum_hex, sz)?; - } - OutputFormat::Tagged(digest_format) => { - print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; - } - OutputFormat::Untagged(digest_format, reading_mode) => { - print_untagged_checksum( - filename, - &encode_sum(sum_hex, digest_format), - reading_mode, - )?; - } - } - - print!("{}", options.line_ending); - } - Ok(()) -} +use uucore::error::UResult; +use uucore::line_ending::LineEnding; +use uucore::{format_usage, translate}; mod options { pub const ALGORITHM: &str = "algorithm"; @@ -455,14 +218,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { matches.get_flag(options::BASE64), ); - let opts = Options { + let opts = ChecksumComputeOptions { algo_kind: algo, - digest: algo.create_digest(), output_format, line_ending, }; - cksum(opts, files)?; + perform_checksum_computation(opts, files)?; Ok(()) } diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs new file mode 100644 index 000000000..015e9bb0f --- /dev/null +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -0,0 +1,246 @@ +use std::ffi::OsStr; +use std::fs::File; +use std::io::{self, BufReader, Read, Write}; +use std::path::Path; + +use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader}; +use crate::error::{FromIo, UResult, USimpleError}; +use crate::line_ending::LineEnding; +use crate::{encoding, os_str_as_bytes, show, translate}; + +pub struct ChecksumComputeOptions { + pub algo_kind: SizedAlgoKind, + pub output_format: OutputFormat, + pub line_ending: LineEnding, +} + +/// Reading mode used to compute digest. +/// +/// On most linux systems, this is irrelevant, as there is no distinction +/// between text and binary files. Refer to GNU's cksum documentation for more +/// information. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ReadingMode { + Binary, + Text, +} + +impl ReadingMode { + #[inline] + fn as_char(&self) -> char { + match self { + Self::Binary => '*', + Self::Text => ' ', + } + } +} + +/// Whether to write the digest as hexadecimal or encoded in base64. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DigestFormat { + Hexadecimal, + Base64, +} + +impl DigestFormat { + #[inline] + fn is_base64(&self) -> bool { + *self == Self::Base64 + } +} + +/// Holds the representation that shall be used for printing a checksum line +#[derive(Debug, PartialEq, Eq)] +pub enum OutputFormat { + /// Raw digest + Raw, + + /// Selected for older algorithms which had their custom formatting + /// + /// Default for crc, sysv, bsd + Legacy, + + /// `$ALGO_NAME ($FILENAME) = $DIGEST` + Tagged(DigestFormat), + + /// '$DIGEST $FLAG$FILENAME' + /// where 'flag' depends on the reading mode + /// + /// Default for standalone checksum utilities + Untagged(DigestFormat, ReadingMode), +} + +impl OutputFormat { + #[inline] + fn is_raw(&self) -> bool { + *self == Self::Raw + } +} + +fn print_legacy_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &str, + size: usize, +) -> UResult<()> { + debug_assert!(options.algo_kind.is_legacy()); + + // Print the sum + match options.algo_kind { + SizedAlgoKind::Sysv => print!( + "{} {}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ), + SizedAlgoKind::Bsd => { + // The BSD checksum output is 5 digit integer + let bsd_width = 5; + print!( + "{:0bsd_width$} {:bsd_width$}", + sum.parse::().unwrap(), + size.div_ceil(options.algo_kind.bitlen()), + ); + } + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + print!("{sum} {size}"); + } + _ => unreachable!("Not a legacy algorithm"), + } + + // Print the filename after a space if not stdin + if filename != "-" { + print!(" "); + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + } + + Ok(()) +} + +fn print_tagged_checksum( + options: &ChecksumComputeOptions, + filename: &OsStr, + sum: &String, +) -> UResult<()> { + // Print algo name and opening parenthesis. + print!("{} (", options.algo_kind.to_tag()); + + // Print filename + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + + // Print closing parenthesis and sum + print!(") = {sum}"); + + Ok(()) +} + +fn print_untagged_checksum( + filename: &OsStr, + sum: &String, + reading_mode: ReadingMode, +) -> UResult<()> { + // Print checksum and reading mode flag + print!("{sum} {}", reading_mode.as_char()); + + // Print filename + let _dropped_result = io::stdout().write_all(os_str_as_bytes(filename)?); + + Ok(()) +} + +/// Calculate checksum +/// +/// # Arguments +/// +/// * `options` - CLI options for the assigning checksum algorithm +/// * `files` - A iterator of [`OsStr`] which is a bunch of files that are using for calculating checksum +pub fn perform_checksum_computation<'a, I>(options: ChecksumComputeOptions, files: I) -> UResult<()> +where + I: Iterator, +{ + let mut files = files.peekable(); + + while let Some(filename) = files.next() { + // Check that in raw mode, we are not provided with several files. + if options.output_format.is_raw() && files.peek().is_some() { + return Err(Box::new(ChecksumError::RawMultipleFiles)); + } + + let filepath = Path::new(filename); + let stdin_buf; + let file_buf; + if filepath.is_dir() { + show!(USimpleError::new( + 1, + translate!("cksum-error-is-directory", "file" => filepath.display()) + )); + continue; + } + + // Handle the file input + let mut file = BufReader::new(if filename == "-" { + stdin_buf = io::stdin(); + Box::new(stdin_buf) as Box + } else { + file_buf = match File::open(filepath) { + Ok(file) => file, + Err(err) => { + show!(err.map_err_context(|| filepath.to_string_lossy().to_string())); + continue; + } + }; + Box::new(file_buf) as Box + }); + + let mut digest = options.algo_kind.create_digest(); + + let (sum_hex, sz) = digest_reader( + &mut digest, + &mut file, + false, + options.algo_kind.bitlen(), + ) + .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; + + // Encodes the sum if df is Base64, leaves as-is otherwise. + let encode_sum = |sum: String, df: DigestFormat| { + if df.is_base64() { + encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) + } else { + sum + } + }; + + match options.output_format { + OutputFormat::Raw => { + let bytes = match options.algo_kind { + SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { + sum_hex.parse::().unwrap().to_be_bytes().to_vec() + } + _ => hex::decode(sum_hex).unwrap(), + }; + // Cannot handle multiple files anyway, output immediately. + io::stdout().write_all(&bytes)?; + return Ok(()); + } + OutputFormat::Legacy => { + print_legacy_checksum(&options, filename, &sum_hex, sz)?; + } + OutputFormat::Tagged(digest_format) => { + print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; + } + OutputFormat::Untagged(digest_format, reading_mode) => { + print_untagged_checksum( + filename, + &encode_sum(sum_hex, digest_format), + reading_mode, + )?; + } + } + + print!("{}", options.line_ending); + } + Ok(()) +} diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index 29af7a491..a3b7e53c9 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -21,6 +21,7 @@ use crate::{ }; use thiserror::Error; +pub mod compute; pub mod validate; pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv";