diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 46b2f9daa..0f38bed05 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -123,7 +123,7 @@ default = [] # * non-default features backup-control = [] colors = [] -checksum = ["data-encoding", "quoting-style", "sum"] +checksum = ["quoting-style", "sum", "base64-simd"] encoding = ["data-encoding", "data-encoding-macro", "z85", "base64-simd"] entries = ["libc"] extendedbigdecimal = ["bigdecimal", "num-traits"] @@ -171,6 +171,7 @@ sum = [ "blake3", "sm3", "crc-fast", + "data-encoding", ] update-control = ["parser"] utf8 = [] diff --git a/src/uucore/src/lib/features/checksum/compute.rs b/src/uucore/src/lib/features/checksum/compute.rs index e91c54166..471e8c66a 100644 --- a/src/uucore/src/lib/features/checksum/compute.rs +++ b/src/uucore/src/lib/features/checksum/compute.rs @@ -13,7 +13,8 @@ use std::path::Path; use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename}; use crate::error::{FromIo, UResult, USimpleError}; use crate::line_ending::LineEnding; -use crate::{encoding, show, translate}; +use crate::sum::DigestOutput; +use crate::{show, translate}; /// Use the same buffer size as GNU when reading a file to create a checksum /// from it: 32 KiB. @@ -139,10 +140,11 @@ pub fn figure_out_output_format( fn print_legacy_checksum( options: &ChecksumComputeOptions, filename: &OsStr, - sum: &str, + sum: &DigestOutput, size: usize, ) -> UResult<()> { debug_assert!(options.algo_kind.is_legacy()); + debug_assert!(matches!(sum, DigestOutput::U16(_) | DigestOutput::Crc(_))); let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul { (filename.to_string_lossy().to_string(), "") @@ -150,28 +152,24 @@ fn print_legacy_checksum( escape_filename(filename) }; - print!("{prefix}"); - // Print the sum - match options.algo_kind { - SizedAlgoKind::Sysv => print!( - "{} {}", - sum.parse::().unwrap(), + match (options.algo_kind, sum) { + (SizedAlgoKind::Sysv, DigestOutput::U16(sum)) => print!( + "{prefix}{sum} {}", size.div_ceil(options.algo_kind.bitlen()), ), - SizedAlgoKind::Bsd => { + (SizedAlgoKind::Bsd, DigestOutput::U16(sum)) => { // The BSD checksum output is 5 digit integer let bsd_width = 5; print!( - "{:0bsd_width$} {:bsd_width$}", - sum.parse::().unwrap(), + "{prefix}{sum:0bsd_width$} {:bsd_width$}", size.div_ceil(options.algo_kind.bitlen()), ); } - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - print!("{sum} {size}"); + (SizedAlgoKind::Crc | SizedAlgoKind::Crc32b, DigestOutput::Crc(sum)) => { + print!("{prefix}{sum} {size}"); } - _ => unreachable!("Not a legacy algorithm"), + (algo, output) => unreachable!("Bug: Invalid legacy checksum ({algo:?}, {output:?})"), } // Print the filename after a space if not stdin @@ -284,49 +282,39 @@ where let mut digest = options.algo_kind.create_digest(); - let (sum_hex, sz) = digest_reader( - &mut digest, - &mut file, - options.binary, - options.algo_kind.bitlen(), - ) - .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; + let (digest_output, sz) = digest_reader(&mut digest, &mut file, options.binary) + .map_err_context(|| translate!("cksum-error-failed-to-read-input"))?; // Encodes the sum if df is Base64, leaves as-is otherwise. - let encode_sum = |sum: String, df: DigestFormat| { + let encode_sum = |sum: DigestOutput, df: DigestFormat| { if df.is_base64() { - encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap()) + sum.to_base64() } else { - sum + sum.to_hex() } }; match options.output_format { OutputFormat::Raw => { - let bytes = match options.algo_kind { - SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => { - sum_hex.parse::().unwrap().to_be_bytes().to_vec() - } - _ => hex::decode(sum_hex).unwrap(), - }; // Cannot handle multiple files anyway, output immediately. - io::stdout().write_all(&bytes)?; + digest_output.write_raw(io::stdout())?; return Ok(()); } OutputFormat::Legacy => { - print_legacy_checksum(&options, filename, &sum_hex, sz)?; + print_legacy_checksum(&options, filename, &digest_output, sz)?; } OutputFormat::Tagged(digest_format) => { - print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?; + print_tagged_checksum( + &options, + filename, + &encode_sum(digest_output, digest_format)?, + )?; } OutputFormat::Untagged(digest_format, reading_mode) => { print_untagged_checksum( &options, filename, - &encode_sum(sum_hex, digest_format), + &encode_sum(digest_output, digest_format)?, reading_mode, )?; } diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index 87c8836fd..5339f833f 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -15,8 +15,8 @@ use thiserror::Error; use crate::error::{UError, UResult}; use crate::show_error; use crate::sum::{ - Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256, - Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, + Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestOutput, DigestWriter, Md5, Sha1, Sha3_224, + Sha3_256, Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV, }; pub mod compute; @@ -420,8 +420,7 @@ pub fn digest_reader( digest: &mut Box, reader: &mut T, binary: bool, - output_bits: usize, -) -> io::Result<(String, usize)> { +) -> io::Result<(DigestOutput, usize)> { digest.reset(); // Read bytes from `reader` and write those bytes to `digest`. @@ -440,14 +439,7 @@ pub fn digest_reader( let output_size = std::io::copy(reader, &mut digest_writer)? as usize; digest_writer.finalize(); - if digest.output_bits() > 0 { - Ok((digest.result_str(), output_size)) - } else { - // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) - let mut bytes = vec![0; output_bits.div_ceil(8)]; - digest.hash_finalize(&mut bytes); - Ok((hex::encode(bytes), output_size)) - } + Ok((digest.result(), output_size)) } /// Calculates the length of the digest. diff --git a/src/uucore/src/lib/features/checksum/validate.rs b/src/uucore/src/lib/features/checksum/validate.rs index 1869d91bf..06bfd6634 100644 --- a/src/uucore/src/lib/features/checksum/validate.rs +++ b/src/uucore/src/lib/features/checksum/validate.rs @@ -660,16 +660,11 @@ fn compute_and_check_digest_from_file( // TODO: improve function signature to use ReadingMode instead of binary bool // Set binary to false because --binary is not supported with --check - let (calculated_checksum, _) = digest_reader( - &mut digest, - &mut file_reader, - /* binary */ false, - algo.bitlen(), - ) - .unwrap(); + let (calculated_checksum, _) = + digest_reader(&mut digest, &mut file_reader, /* binary */ false).unwrap(); // Do the checksum validation - let checksum_correct = expected_checksum == calculated_checksum; + let checksum_correct = expected_checksum == calculated_checksum.to_hex()?; print_file_report( std::io::stdout(), filename, diff --git a/src/uucore/src/lib/features/sum.rs b/src/uucore/src/lib/features/sum.rs index e517a03fc..66fb752ab 100644 --- a/src/uucore/src/lib/features/sum.rs +++ b/src/uucore/src/lib/features/sum.rs @@ -12,12 +12,52 @@ //! [`DigestWriter`] struct provides a wrapper around [`Digest`] that //! implements the [`Write`] trait, for use in situations where calling //! [`write`] would be useful. -use std::io::Write; -use hex::encode; +use std::io::{self, Write}; + +use data_encoding::BASE64; + #[cfg(windows)] use memchr::memmem; +use crate::error::{UResult, USimpleError}; + +/// Represents the output of a checksum computation. +#[derive(Debug)] +pub enum DigestOutput { + /// Varying-size output + Vec(Vec), + /// Legacy output for Crc and Crc32B modes + Crc(u32), + /// Legacy output for Sysv and BSD modes + U16(u16), +} + +impl DigestOutput { + pub fn write_raw(&self, mut w: impl std::io::Write) -> io::Result<()> { + match self { + Self::Vec(buf) => w.write_all(buf), + // For legacy outputs, print them in big endian + Self::Crc(n) => w.write_all(&n.to_be_bytes()), + Self::U16(n) => w.write_all(&n.to_be_bytes()), + } + } + + pub fn to_hex(&self) -> UResult { + match self { + Self::Vec(buf) => Ok(hex::encode(buf)), + _ => Err(USimpleError::new(1, "Legacy output cannot be encoded")), + } + } + + pub fn to_base64(&self) -> UResult { + match self { + Self::Vec(buf) => Ok(BASE64.encode(buf)), + _ => Err(USimpleError::new(1, "Legacy output cannot be encoded")), + } + } +} + pub trait Digest { fn new() -> Self where @@ -29,10 +69,11 @@ pub trait Digest { fn output_bytes(&self) -> usize { self.output_bits().div_ceil(8) } - fn result_str(&mut self) -> String { + + fn result(&mut self) -> DigestOutput { let mut buf: Vec = vec![0; self.output_bytes()]; self.hash_finalize(&mut buf); - encode(buf) + DigestOutput::Vec(buf) } } @@ -167,10 +208,12 @@ impl Digest for Crc { out.copy_from_slice(&self.digest.finalize().to_ne_bytes()); } - fn result_str(&mut self) -> String { + fn result(&mut self) -> DigestOutput { let mut out: [u8; 8] = [0; 8]; self.hash_finalize(&mut out); - u64::from_ne_bytes(out).to_string() + + let x = u64::from_ne_bytes(out); + DigestOutput::Crc((x & (u32::MAX as u64)) as u32) } fn reset(&mut self) { @@ -214,10 +257,10 @@ impl Digest for CRC32B { 32 } - fn result_str(&mut self) -> String { + fn result(&mut self) -> DigestOutput { let mut out = [0; 4]; self.hash_finalize(&mut out); - format!("{}", u32::from_be_bytes(out)) + DigestOutput::Crc(u32::from_be_bytes(out)) } } @@ -240,10 +283,10 @@ impl Digest for Bsd { out.copy_from_slice(&self.state.to_ne_bytes()); } - fn result_str(&mut self) -> String { - let mut _out: Vec = vec![0; 2]; + fn result(&mut self) -> DigestOutput { + let mut _out = [0; 2]; self.hash_finalize(&mut _out); - format!("{}", self.state) + DigestOutput::U16(self.state) } fn reset(&mut self) { @@ -275,10 +318,10 @@ impl Digest for SysV { out.copy_from_slice(&(self.state as u16).to_ne_bytes()); } - fn result_str(&mut self) -> String { - let mut _out: Vec = vec![0; 2]; + fn result(&mut self) -> DigestOutput { + let mut _out = [0; 2]; self.hash_finalize(&mut _out); - format!("{}", self.state) + DigestOutput::U16((self.state & (u16::MAX as u32)) as u16) } fn reset(&mut self) { @@ -292,7 +335,7 @@ impl Digest for SysV { // Implements the Digest trait for sha2 / sha3 algorithms with fixed output macro_rules! impl_digest_common { - ($algo_type: ty, $size: expr) => { + ($algo_type: ty, $size: literal) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) @@ -319,7 +362,7 @@ macro_rules! impl_digest_common { // Implements the Digest trait for sha2 / sha3 algorithms with variable output macro_rules! impl_digest_shake { - ($algo_type: ty) => { + ($algo_type: ty, $output_bits: literal) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) @@ -338,7 +381,13 @@ macro_rules! impl_digest_shake { } fn output_bits(&self) -> usize { - 0 + $output_bits + } + + fn result(&mut self) -> DigestOutput { + let mut bytes = vec![0; self.output_bits().div_ceil(8)]; + self.hash_finalize(&mut bytes); + DigestOutput::Vec(bytes) } } }; @@ -368,8 +417,8 @@ impl_digest_common!(Sha3_512, 512); pub struct Shake128(sha3::Shake128); pub struct Shake256(sha3::Shake256); -impl_digest_shake!(Shake128); -impl_digest_shake!(Shake256); +impl_digest_shake!(Shake128, 256); +impl_digest_shake!(Shake256, 512); /// A struct that writes to a digest. /// @@ -501,14 +550,14 @@ mod tests { writer_crlf.write_all(b"\r").unwrap(); writer_crlf.write_all(b"\n").unwrap(); writer_crlf.finalize(); - let result_crlf = digest.result_str(); + let result_crlf = digest.result(); // We expect "\r\n" to be replaced with "\n" in text mode on Windows. let mut digest = Box::new(Md5::new()) as Box; let mut writer_lf = DigestWriter::new(&mut digest, false); writer_lf.write_all(b"\n").unwrap(); writer_lf.finalize(); - let result_lf = digest.result_str(); + let result_lf = digest.result(); assert_eq!(result_crlf, result_lf); }